summaryrefslogtreecommitdiff
path: root/sys/netinet
diff options
context:
space:
mode:
authorRyan Thomas McBride <mcbride@cvs.openbsd.org>2003-10-17 21:05:00 +0000
committerRyan Thomas McBride <mcbride@cvs.openbsd.org>2003-10-17 21:05:00 +0000
commitfaedf7a876de9ee825ed44b1f6f9dcdbe056bdcd (patch)
treefb322664c24a0866845294d2d89651bb52715ad7 /sys/netinet
parent570077e03f36a1ce16a462a69f136900e3221923 (diff)
Common Address Redundancy Protocol
Allows multiple hosts to share an IP address, providing high availability and load balancing. Based on code by mickey@, with additional help from markus@ and Marco_Pfatschbacher@genua.de ok deraadt@
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/if_ether.c53
-rw-r--r--sys/netinet/in.h10
-rw-r--r--sys/netinet/in_proto.c14
-rw-r--r--sys/netinet/ip_carp.c1107
-rw-r--r--sys/netinet/ip_carp.h124
5 files changed, 1284 insertions, 24 deletions
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
index a2f1fd99566..a165e998e5b 100644
--- a/sys/netinet/if_ether.c
+++ b/sys/netinet/if_ether.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_ether.c,v 1.49 2003/09/24 21:11:34 itojun Exp $ */
+/* $OpenBSD: if_ether.c,v 1.50 2003/10/17 21:04:58 mcbride Exp $ */
/* $NetBSD: if_ether.c,v 1.31 1996/05/11 12:59:58 mycroft Exp $ */
/*
@@ -39,6 +39,7 @@
*/
#ifdef INET
+#include "carp.h"
#include "bridge.h"
@@ -59,6 +60,9 @@
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/if_ether.h>
+#if NCARP > 0
+#include <netinet/ip_carp.h>
+#endif
#define SIN(s) ((struct sockaddr_in *)s)
#define SDL(s) ((struct sockaddr_dl *)s)
@@ -89,9 +93,9 @@ int useloopback = 1; /* use loopback interface for local traffic */
int arpinit_done = 0;
/* revarp state */
-static struct in_addr myip, srv_ip;
-static int myip_initialized = 0;
-static int revarp_in_progress = 0;
+struct in_addr myip, srv_ip;
+int myip_initialized = 0;
+int revarp_in_progress = 0;
struct ifnet *myip_ifp = NULL;
#ifdef DDB
@@ -515,6 +519,7 @@ in_arpinput(m)
struct sockaddr_dl *sdl;
struct sockaddr sa;
struct in_addr isaddr, itaddr, myaddr;
+ u_int8_t *enaddr = NULL;
int op;
ea = mtod(m, struct ether_arp *);
@@ -550,6 +555,16 @@ in_arpinput(m)
m->m_pkthdr.rcvif->if_bridge == ia->ia_ifp->if_bridge)
bridge_ia = ia;
#endif
+
+#if NCARP > 0
+ if (ac->ac_if.if_carp) {
+ if (carp_iamatch(ac->ac_if.if_carp, ia,
+ &isaddr, &enaddr))
+ break;
+ else
+ goto out;
+ }
+#endif
}
#if NBRIDGE > 0
@@ -582,10 +597,11 @@ in_arpinput(m)
if (ia == NULL)
goto out;
+ if (!enaddr)
+ enaddr = ac->ac_enaddr;
myaddr = ia->ia_addr.sin_addr;
- if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)ac->ac_enaddr,
- sizeof (ea->arp_sha)))
+ if (!bcmp((caddr_t)ea->arp_sha, enaddr, sizeof (ea->arp_sha)))
goto out; /* it's from me, ignore it. */
if (ETHER_IS_MULTICAST (&ea->arp_sha[0])) {
if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)etherbroadcastaddr,
@@ -609,7 +625,7 @@ in_arpinput(m)
la = arplookup(isaddr.s_addr, itaddr.s_addr == myaddr.s_addr, 0);
if (la && (rt = la->la_rt) && (sdl = SDL(rt->rt_gateway))) {
if (sdl->sdl_alen) {
- if (bcmp((caddr_t)ea->arp_sha, LLADDR(sdl), sdl->sdl_alen)) {
+ if (bcmp(ea->arp_sha, LLADDR(sdl), sdl->sdl_alen)) {
if (rt->rt_flags & RTF_PERMANENT_ARP) {
log(LOG_WARNING,
"arp: attempt to overwrite permanent "
@@ -645,7 +661,7 @@ in_arpinput(m)
ac->ac_if.if_xname);
goto out;
}
- bcopy((caddr_t)ea->arp_sha, LLADDR(sdl),
+ bcopy(ea->arp_sha, LLADDR(sdl),
sdl->sdl_alen = sizeof(ea->arp_sha));
if (rt->rt_expire)
rt->rt_expire = time.tv_sec + arpt_keep;
@@ -665,30 +681,25 @@ reply:
}
if (itaddr.s_addr == myaddr.s_addr) {
/* I am the target */
- bcopy((caddr_t)ea->arp_sha, (caddr_t)ea->arp_tha,
- sizeof(ea->arp_sha));
- bcopy((caddr_t)ac->ac_enaddr, (caddr_t)ea->arp_sha,
- sizeof(ea->arp_sha));
+ bcopy(ea->arp_sha, ea->arp_tha, sizeof(ea->arp_sha));
+ bcopy(enaddr, ea->arp_sha, sizeof(ea->arp_sha));
} else {
la = arplookup(itaddr.s_addr, 0, SIN_PROXY);
if (la == 0)
goto out;
rt = la->la_rt;
- bcopy((caddr_t)ea->arp_sha, (caddr_t)ea->arp_tha,
- sizeof(ea->arp_sha));
+ bcopy(ea->arp_sha, ea->arp_tha, sizeof(ea->arp_sha));
sdl = SDL(rt->rt_gateway);
- bcopy(LLADDR(sdl), (caddr_t)ea->arp_sha, sizeof(ea->arp_sha));
+ bcopy(LLADDR(sdl), ea->arp_sha, sizeof(ea->arp_sha));
}
- bcopy((caddr_t)ea->arp_spa, (caddr_t)ea->arp_tpa, sizeof(ea->arp_spa));
- bcopy((caddr_t)&itaddr, (caddr_t)ea->arp_spa, sizeof(ea->arp_spa));
+ bcopy(ea->arp_spa, ea->arp_tpa, sizeof(ea->arp_spa));
+ bcopy(&itaddr, ea->arp_spa, sizeof(ea->arp_spa));
ea->arp_op = htons(ARPOP_REPLY);
ea->arp_pro = htons(ETHERTYPE_IP); /* let's be sure! */
eh = (struct ether_header *)sa.sa_data;
- bcopy((caddr_t)ea->arp_tha, (caddr_t)eh->ether_dhost,
- sizeof(eh->ether_dhost));
- bcopy((caddr_t)ac->ac_enaddr, (caddr_t)eh->ether_shost,
- sizeof(eh->ether_shost));
+ bcopy(ea->arp_tha, eh->ether_dhost, sizeof(eh->ether_dhost));
+ bcopy(enaddr, eh->ether_shost, sizeof(eh->ether_shost));
eh->ether_type = htons(ETHERTYPE_ARP);
sa.sa_family = AF_UNSPEC;
sa.sa_len = sizeof(sa);
diff --git a/sys/netinet/in.h b/sys/netinet/in.h
index 2a556936ce3..03dabb85914 100644
--- a/sys/netinet/in.h
+++ b/sys/netinet/in.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: in.h,v 1.57 2003/06/02 23:28:13 millert Exp $ */
+/* $OpenBSD: in.h,v 1.58 2003/10/17 21:04:58 mcbride Exp $ */
/* $NetBSD: in.h,v 1.20 1996/02/13 23:41:47 christos Exp $ */
/*
@@ -72,6 +72,7 @@
#define IPPROTO_ENCAP 98 /* encapsulation header */
#define IPPROTO_PIM 103 /* Protocol indep. multicast */
#define IPPROTO_IPCOMP 108 /* IP Payload Comp. Protocol */
+#define IPPROTO_CARP 112 /* CARP */
#define IPPROTO_RAW 255 /* raw IP packet */
#define IPPROTO_MAX 256
@@ -198,6 +199,7 @@ struct in_addr {
#define INADDR_UNSPEC_GROUP __IPADDR(0xe0000000) /* 224.0.0.0 */
#define INADDR_ALLHOSTS_GROUP __IPADDR(0xe0000001) /* 224.0.0.1 */
#define INADDR_ALLROUTERS_GROUP __IPADDR(0xe0000002) /* 224.0.0.2 */
+#define INADDR_CARP_GROUP __IPADDR(0xe0000012) /* 224.0.0.18 */
#define INADDR_MAX_LOCAL_GROUP __IPADDR(0xe00000ff) /* 224.0.0.255 */
#define IN_LOOPBACKNET 127 /* official! */
@@ -312,7 +314,7 @@ struct ip_mreq {
* Third level is protocol number.
* Fourth level is desired variable within that protocol.
*/
-#define IPPROTO_MAXID (IPPROTO_IPCOMP + 1) /* don't list to IPPROTO_MAX */
+#define IPPROTO_MAXID (IPPROTO_CARP + 1) /* don't list to IPPROTO_MAX */
#define CTL_IPPROTO_NAMES { \
{ "ip", CTLTYPE_NODE }, \
@@ -424,6 +426,10 @@ struct ip_mreq {
{ 0, 0 }, \
{ 0, 0 }, \
{ "ipcomp", CTLTYPE_NODE }, \
+ { 0, 0 }, \
+ { 0, 0 }, \
+ { 0, 0 }, \
+ { "carp", CTLTYPE_NODE }, \
}
/*
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
index a52b87aa944..2f83f8fcb65 100644
--- a/sys/netinet/in_proto.c
+++ b/sys/netinet/in_proto.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: in_proto.c,v 1.36 2003/06/02 23:28:14 millert Exp $ */
+/* $OpenBSD: in_proto.c,v 1.37 2003/10/17 21:04:58 mcbride Exp $ */
/* $NetBSD: in_proto.c,v 1.14 1996/02/18 18:58:32 christos Exp $ */
/*
@@ -176,6 +176,11 @@
#include <net/if_gre.h>
#endif
+#include "carp.h"
+#if NCARP > 0
+#include <netinet/ip_carp.h>
+#endif
+
extern struct domain inetdomain;
struct protosw inetsw[] = {
@@ -299,6 +304,13 @@ struct protosw inetsw[] = {
0, 0, 0, 0, ipmobile_sysctl
},
#endif /* NGRE > 0 */
+#if NCARP > 0
+{ SOCK_RAW, &inetdomain, IPPROTO_CARP, PR_ATOMIC|PR_ADDR,
+ carp_input, rip_output, 0, rip_ctloutput,
+ rip_usrreq,
+ 0, 0, 0, 0, carp_sysctl
+},
+#endif /* NCARP > 0 */
/* raw wildcard */
{ SOCK_RAW, &inetdomain, 0, PR_ATOMIC|PR_ADDR,
rip_input, rip_output, 0, rip_ctloutput,
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
new file mode 100644
index 00000000000..175102facb6
--- /dev/null
+++ b/sys/netinet/ip_carp.c
@@ -0,0 +1,1107 @@
+/* $OpenBSD: ip_carp.c,v 1.1 2003/10/17 21:04:58 mcbride Exp $ */
+
+/*
+ * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
+ * Copyright (c) 2003 Ryan McBride. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * TODO:
+ * - iface reconfigure
+ * - find a way to schednetisr() packet earlier than through inetsw[];
+ * - track iface ip address changes;
+ * - support for hardware checksum calculations;
+ * - support for inet6;
+ *
+ */
+
+#include "ether.h"
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/device.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+
+#include <machine/cpu.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_llc.h>
+#include <net/route.h>
+#include <net/netisr.h>
+
+#if NFDDI > 0
+#include <net/if_fddi.h>
+#endif
+#if NTOKEN > 0
+#include <net/if_token.h>
+#endif
+
+#include <crypto/sha1.h>
+
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip_ipsp.h>
+
+#include <net/if_enc.h>
+#endif
+
+#include "bpfilter.h"
+#if NBPFILTER > 0
+#include <net/bpf.h>
+#endif
+
+#include <netinet/ip_carp.h>
+
+struct carp_softc {
+ struct arpcom sc_ac;
+ int if_flags; /* current flags to treat UP/DOWN */
+ struct ifnet *sc_ifp;
+ struct in_ifaddr *sc_ia; /* primary iface address */
+ struct ip_moptions sc_imo;
+ TAILQ_ENTRY(carp_softc) sc_list;
+
+ enum { INIT = 0, BACKUP, MASTER } sc_state;
+
+ int sc_vhid;
+ int sc_advskew;
+ int sc_naddrs;
+ int sc_advbase; /* seconds */
+ int sc_init_counter;
+ unsigned char sc_key[CARP_KEY_LEN];
+ u_int64_t sc_counter;
+
+ struct timeout sc_ad_tmo; /* advertisement timeout */
+ struct timeout sc_md_tmo; /* master down timeout */
+
+} *carp_softc;
+int carp_number;
+int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0 }; /* XXX for now */
+struct carpstats carpstats;
+
+struct carp_if {
+ TAILQ_HEAD(, carp_softc) vhif_vrs;
+ int vhif_nvrs;
+
+ struct ifnet *vhif_ifp;
+};
+
+#define CARP_LOG(s,a) if (carp_opts[CARPCTL_LOG]) \
+ log(LOG_INFO, "carp: " s "\n", (a));
+#define CARP_LOG1(sc,s,a) if (carp_opts[CARPCTL_LOG]) \
+ log(LOG_INFO, "%s: " s "\n", (sc)->sc_ac.ac_if.if_xname, (a));
+
+void carp_sha1_generate (struct carp_softc *, u_int32_t *,
+ unsigned char *);
+int carp_sha1_verify (struct carp_softc *, u_int32_t *,
+ unsigned char *);
+void carpattach (int);
+void carpdetach (struct carp_softc *);
+void carp_send_ad (void *);
+void carp_send_arp (struct carp_softc *);
+void carp_master_down (void *);
+int carp_sluggish (struct carp_softc *, struct carp_header *);
+int carp_ioctl (struct ifnet *, u_long, caddr_t);
+void carp_start (struct ifnet *);
+void carp_setrun (struct carp_softc *);
+int carp_set_addr (struct carp_softc *, struct sockaddr_in *);
+int carp_del_addr (struct carp_softc *, struct sockaddr_in *);
+
+static __inline u_int16_t
+carp_cksum(struct mbuf *m, int len)
+{
+ return in_cksum(m, len);
+}
+
+void
+carp_sha1_generate(struct carp_softc *sc, u_int32_t counter[2], unsigned char md[20])
+{
+ /* XXX this should probably use the crypto framework */
+ SHA1_CTX sha1ctx;
+ u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
+ struct ifaddr *ifa;
+
+ SHA1Init(&sha1ctx);
+
+ SHA1Update(&sha1ctx, (void *)&version, sizeof(version));
+ SHA1Update(&sha1ctx, (void *)&type, sizeof(type));
+ SHA1Update(&sha1ctx, (void *)&sc->sc_vhid, sizeof(sc->sc_vhid));
+ SHA1Update(&sha1ctx, (void *)counter, sizeof(*counter));
+ SHA1Update(&sha1ctx, (void *)&sc->sc_key, sizeof(sc->sc_key));
+ TAILQ_FOREACH(ifa, &sc->sc_ac.ac_if.if_addrlist, ifa_list) {
+ if (ifa->ifa_addr->sa_family == AF_INET)
+ SHA1Update(&sha1ctx,
+ (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr,
+ sizeof(u_int32_t));
+ }
+
+ SHA1Final(md, &sha1ctx);
+}
+
+int
+carp_sha1_verify(struct carp_softc *sc, u_int32_t counter[2], unsigned char md[20])
+{
+ unsigned char md2[20];
+
+ carp_sha1_generate(sc, counter, md2);
+
+ return (bcmp(md, md2, sizeof(md2)));
+}
+
+/*
+ * process input packet.
+ * we have rearranged checks order compared to the rfc,
+ * but it seems more efficient this way or not possible otherwise.
+ */
+void
+carp_input(struct mbuf *m, ...)
+{
+ struct ifnet *ifp = m->m_pkthdr.rcvif;
+ struct carp_softc *sc;
+ struct ip *ip = mtod(m, struct ip *);
+ struct carp_header *ch;
+ int iplen, len, hlen;
+ va_list ap;
+ u_int64_t tmp_counter;
+ struct timeval sc_tv, ch_tv;
+
+ va_start(ap, m);
+ hlen = va_arg(ap, int);
+ va_end(ap);
+
+ carpstats.carps_ipackets++;
+
+ if (!carp_opts[CARPCTL_ALLOW]) {
+ m_freem(m);
+ return;
+ }
+
+ /* verify that the IP TTL is 255. */
+ if (ip->ip_ttl != CARP_DFLTTL) {
+ carpstats.carps_badttl++;
+ CARP_LOG("received ttl %d != 255", ip->ip_ttl);
+ m_freem(m);
+ return;
+ }
+
+ iplen = ip->ip_hl << 2;
+
+ if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
+ carpstats.carps_badlen++;
+ CARP_LOG("received len %d < 8",
+ m->m_len - sizeof(struct ip));
+ m_freem(m);
+ return;
+ }
+
+ if (iplen + sizeof(*ch) < m->m_len) {
+ if ((m = m_pullup2(m, iplen + sizeof(*ch))) == NULL) {
+ carpstats.carps_hdrops++;
+ /* CARP_LOG ? */
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ }
+ ch = (void *)ip + iplen;
+
+ /*
+ * verify that the received packet length is
+ * equal to the CARP header
+ */
+ len = iplen + sizeof(*ch);
+ if (len > m->m_pkthdr.len) {
+ carpstats.carps_badlen++;
+ CARP_LOG("packet too short %d", m->m_pkthdr.len);
+ m_freem(m);
+ return;
+ }
+
+ if ((m = m_pullup2(m, len)) == NULL) {
+ carpstats.carps_hdrops++;
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ ch = (void *)ip + iplen;
+
+ /* verify the CARP checksum */
+ m->m_data += iplen;
+ if (carp_cksum(m, len - iplen)) {
+ carpstats.carps_badsum++;
+ CARP_LOG("checksum failed", 0);
+ m_freem(m);
+ return;
+ }
+ m->m_data -= iplen;
+
+ /* verify that the VHID is valid on the receiving interface */
+ TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
+ if (sc->sc_vhid == ch->carp_vhid)
+ break;
+ if (!sc || (sc->sc_ac.ac_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
+ (IFF_UP|IFF_RUNNING)) {
+ carpstats.carps_badvhid++;
+ m_freem(m);
+ return;
+ }
+
+ sc->sc_ac.ac_if.if_lastchange = time;
+ sc->sc_ac.ac_if.if_ipackets++;
+ sc->sc_ac.ac_if.if_ibytes += m->m_pkthdr.len;
+
+#if NBPFILTER > 0
+ if (sc->sc_ac.ac_if.if_bpf) {
+ /*
+ * We need to prepend the address family as
+ * a four byte field. Cons up a dummy header
+ * to pacify bpf. This is safe because bpf
+ * will only read from the mbuf (i.e., it won't
+ * try to free it or keep a pointer to it).
+ */
+ struct mbuf m0;
+ u_int32_t af = htonl(AF_INET);
+
+ m0.m_next = m;
+ m0.m_len = sizeof(af);
+ m0.m_data = (char *)&af;
+ bpf_mtap(sc->sc_ac.ac_if.if_bpf, &m0);
+ }
+#endif
+
+ /* verify the CARP version. */
+ if (ch->carp_version != CARP_VERSION) {
+ carpstats.carps_badver++;
+ sc->sc_ac.ac_if.if_ierrors++;
+ CARP_LOG1(sc, "invalid version %d", ch->carp_version);
+ m_freem(m);
+ return;
+ }
+
+ if (len < m->m_len) {
+ if ((m = m_pullup(m, len)) == NULL) {
+ carpstats.carps_hdrops++;
+ sc->sc_ac.ac_if.if_ierrors++;
+ /* CARP_LOG ? */
+ m_freem(m);
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ ch = mtod(m, struct carp_header *) + iplen;
+ }
+ len -= iplen;
+
+ /* verify the hash */
+ if (carp_sha1_verify(sc, ch->carp_counter, ch->carp_md)) {
+ carpstats.carps_badauth++;
+ sc->sc_ac.ac_if.if_ierrors++;
+ CARP_LOG("incorrect hash", 0);
+ m_freem(m);
+ return;
+ }
+
+ tmp_counter = ntohl(ch->carp_counter[0]);
+ tmp_counter = tmp_counter<<32;
+ tmp_counter += ntohl(ch->carp_counter[1]);
+
+ /* XXX Replay protection goes here */
+
+ if (sc->sc_init_counter)
+ sc->sc_init_counter = 0;
+ sc->sc_counter = tmp_counter;
+
+
+ sc_tv.tv_sec = sc->sc_advbase;
+ sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ ch_tv.tv_sec = ch->carp_advbase;
+ ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
+
+ switch (sc->sc_state) {
+ case INIT:
+ break;
+ case MASTER:
+ /*
+ * If we're allowing preemption, and we recieve an advertisement
+ * from a master who's going to be more frequent than us,
+ * go into BACKUP state.
+ */
+ if (carp_opts[CARPCTL_PREEMPT] &&
+ (timercmp(&sc_tv, &ch_tv, >) ||
+ (timercmp(&sc_tv, &ch_tv, ==) &&
+ ip->ip_src.s_addr > sc->sc_ia->ia_addr.sin_addr.s_addr))) {
+ timeout_del(&sc->sc_ad_tmo);
+ sc->sc_state = BACKUP;
+ carp_setrun(sc);
+ }
+ break;
+ case BACKUP:
+ /*
+ * If we're pre-empting masters who advertise slower than us,
+ * and this one claims to be slower, treat him as down.
+ */
+ if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) {
+ carp_master_down(sc);
+ break;
+ }
+
+ /*
+ * If the master is going to advertise at such a low frequency
+ * that he's guaranteed to time out, we'd might as well just
+ * treat him as timed out now.
+ */
+ sc_tv.tv_sec = sc->sc_advbase * 3;
+ if (timercmp(&sc_tv, &ch_tv, <)) {
+ carp_master_down(sc);
+ break;
+ }
+
+ /*
+ * Otherwise, we reset the counter and wait for the next
+ * advertisement.
+ */
+ carp_setrun(sc);
+ break;
+ }
+
+ m_freem(m);
+ return;
+}
+
+int
+carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
+ size_t newlen)
+{
+ /* All sysctl names at this level are terminal. */
+ if (namelen != 1)
+ return ENOTDIR;
+
+ if (name[0] == 0 || name[0] >= CARPCTL_MAXID)
+ return ENOPROTOOPT;
+
+ return sysctl_int(oldp, oldlenp, newp, newlen, &carp_opts[name[0]]);
+}
+
+
+/*
+ * Interface side of the CARP implementation.
+ */
+void
+carpattach(int number)
+{
+ extern int ifqmaxlen;
+ int i;
+ struct carp_softc *sc;
+ struct ifnet *ifp;
+
+ carp_softc = malloc(number * sizeof(*carp_softc), M_DEVBUF, M_NOWAIT);
+ if (!carp_softc) {
+ printf("cannot alloc CARP data\n");
+ return;
+ }
+ bzero(carp_softc, number * sizeof(*carp_softc));
+ carp_number = number;
+
+ for (i = 0; i < number; i++) {
+
+ sc = &carp_softc[i];
+ sc->sc_advbase = CARP_DFLTINTV;
+ sc->sc_vhid = -1; /* required setting */
+ sc->sc_advskew = 0;
+ sc->sc_init_counter = 1;
+ timeout_set(&sc->sc_ad_tmo, carp_send_ad, sc);
+ timeout_set(&sc->sc_md_tmo, carp_master_down, sc);
+
+ ifp = &sc->sc_ac.ac_if;
+ ifp->if_softc = sc;
+ snprintf(ifp->if_xname, sizeof(ifp->if_xname), "carp%d", i);
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_flags = 0;
+ ifp->if_ioctl = carp_ioctl;
+ ifp->if_output = looutput;
+ ifp->if_start = carp_start;
+ ifp->if_type = IFT_PROPVIRTUAL;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ ifp->if_hdrlen = 0;
+ if_attach(ifp);
+ if_alloc_sadl(ifp);
+#if NBPFILTER > 0
+ bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
+#endif
+ }
+}
+
+void
+carpdetach(struct carp_softc *sc)
+{
+ struct ifaddr *ifa;
+
+ timeout_del(&sc->sc_ad_tmo);
+ timeout_del(&sc->sc_md_tmo);
+
+ while ((ifa = TAILQ_FIRST(&sc->sc_ac.ac_if.if_addrlist)) != NULL)
+ if (ifa->ifa_addr->sa_family == AF_INET) {
+ struct in_ifaddr *ia = ifatoia(ifa);
+
+ carp_del_addr(sc, &ia->ia_addr);
+
+ /* ripped screaming from in_control(SIOCDIFADDR) */
+ in_ifscrub(&sc->sc_ac.ac_if, ia);
+ TAILQ_REMOVE(&sc->sc_ac.ac_if.if_addrlist, ifa, ifa_list);
+ TAILQ_REMOVE(&in_ifaddr, ia, ia_list);
+ IFAFREE((&ia->ia_ifa));
+ }
+}
+
+/* Detach an interface from the carp. */
+void
+carp_ifdetach(struct ifnet *ifp)
+{
+ struct carp_softc *sc;
+
+ TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
+ carpdetach(sc);
+}
+
+void
+carp_send_ad(void *v)
+{
+ struct carp_softc *sc = v;
+ struct carp_header *ch;
+ struct mbuf *m;
+ struct m_tag *mtag;
+ struct ip *ip;
+ int len, advbase, advskew, error;
+
+ /* bow out if we've lost our UPness or RUNNINGuiness */
+ if ((sc->sc_ac.ac_if.if_flags &
+ (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
+ advbase = 255;
+ advskew = 255;
+ } else {
+ advbase = sc->sc_advbase;
+ advskew = sc->sc_advskew;
+ }
+
+ carpstats.carps_opackets++;
+
+ /* MGETHDR(m, M_DONTWAIT, MT_HEADER); */
+ m = m_gethdr(M_DONTWAIT, MT_HEADER);
+ if (m == NULL) {
+ sc->sc_ac.ac_if.if_oerrors++;
+ carpstats.carps_onomem++;
+ /* XXX maybe less ? */
+ timeout_add(&sc->sc_ad_tmo, hz * sc->sc_advbase);
+ return;
+ }
+ len = sizeof(*ip) + sizeof(*ch);
+ m->m_pkthdr.len = len;
+ m->m_pkthdr.rcvif = NULL;
+ m->m_len = len;
+ MH_ALIGN(m, m->m_len);
+ m->m_flags |= M_MCAST;
+ ip = mtod(m, struct ip *);
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(*ip) >> 2;
+ ip->ip_tos = IPTOS_LOWDELAY;
+ ip->ip_len = htons(len);
+ ip->ip_id = htons(ip_randomid());
+ ip->ip_off = htons(IP_DF);
+ ip->ip_ttl = CARP_DFLTTL;
+ ip->ip_p = IPPROTO_CARP;
+ ip->ip_sum = 0;
+ ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
+ ip->ip_dst.s_addr = INADDR_CARP_GROUP;
+
+ ch = (void *)ip + sizeof(*ip);
+ ch->carp_version = CARP_VERSION;
+ ch->carp_type = CARP_ADVERTISEMENT;
+ ch->carp_vhid = sc->sc_vhid;
+ ch->carp_advbase = advbase;
+ ch->carp_advskew = advskew;
+ ch->carp_authlen = 7; /* XXX DEFINE */
+ ch->carp_pad1 = 0; /* must be zero */
+ ch->carp_cksum = 0;
+
+ if (sc->sc_init_counter) {
+ /* this could also be seconds since unix epoch */
+ sc->sc_counter = arc4random();
+ sc->sc_counter = sc->sc_counter << 32;
+ sc->sc_counter += arc4random();
+ } else if (sc->sc_counter == 0xffffffffffffffff) {
+ sc->sc_counter = 0;
+ } else
+ sc->sc_counter++;
+
+ ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
+ ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
+
+ carp_sha1_generate(sc, ch->carp_counter, ch->carp_md);
+
+ m->m_data += sizeof(*ip);
+ ch->carp_cksum = carp_cksum(m, len - sizeof(*ip));
+ m->m_data -= sizeof(*ip);
+
+ sc->sc_ac.ac_if.if_lastchange = time;
+ sc->sc_ac.ac_if.if_opackets++;
+ sc->sc_ac.ac_if.if_obytes += len;
+
+ /* Tag packet for carp_output */
+ mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), M_NOWAIT);
+ if (mtag == NULL) {
+ m_freem(m);
+ sc->sc_ac.ac_if.if_oerrors++;
+ error = ENOMEM;
+ return;
+ }
+ bcopy(&sc, (caddr_t)(mtag + 1), sizeof(struct carp_softc *));
+ m_tag_prepend(m, mtag);
+
+ if ((error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)))
+ sc->sc_ac.ac_if.if_oerrors++;
+
+ if (advbase)
+ timeout_add(&sc->sc_ad_tmo, hz * sc->sc_advbase);
+}
+
+/*
+ * Broadcast a gratuitous ARP request containing
+ * the virtual router MAC address for each IP address
+ * associated with the virtual router.
+ */
+void
+carp_send_arp(struct carp_softc *sc)
+{
+ struct ifaddr *ifa;
+ in_addr_t in;
+
+ TAILQ_FOREACH(ifa, &sc->sc_ac.ac_if.if_addrlist, ifa_list) {
+
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ continue;
+
+ in = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
+ arprequest(sc->sc_ifp, &in, &in, sc->sc_ac.ac_enaddr);
+ DELAY(1000); /* XXX */
+ }
+}
+
+int
+carp_iamatch(void *v, struct in_ifaddr *ia,
+ struct in_addr *isaddr, u_int8_t **enaddr)
+{
+ struct carp_if *cif = v;
+ struct carp_softc *vh;
+ int index, count = 0;
+ struct ifaddr *ifa;
+
+ if (carp_opts[CARPCTL_ARPBALANCE]) {
+ /*
+ * XXX proof of concept iplementation.
+ * We use the source ip to decide which virtual host should
+ * handle the request. If we're master of that virtual host,
+ * then we respond, otherwise, just drop the arp packet on
+ * the floor.
+ */
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+ TAILQ_FOREACH(ifa, &vh->sc_ac.ac_if.if_addrlist,
+ ifa_list) {
+ if (ia->ia_addr.sin_addr.s_addr ==
+ ifatoia(ifa)->ia_addr.sin_addr.s_addr)
+ count++;
+ }
+ }
+ if (count == 0) {
+ /* should never reach this */
+ return 1;
+ }
+ /* this should be a hash, like pf_hash() */
+ index = isaddr->s_addr % count;
+
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+ TAILQ_FOREACH(ifa, &vh->sc_ac.ac_if.if_addrlist,
+ ifa_list) {
+ if (ia->ia_addr.sin_addr.s_addr ==
+ ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
+ if (index == 0 &&
+ ((vh->sc_ac.ac_if.if_flags &
+ (IFF_UP|IFF_RUNNING)) ==
+ (IFF_UP|IFF_RUNNING))) {
+ *enaddr = vh->sc_ac.ac_enaddr;
+ return (1);
+ }
+ index--;
+ }
+ }
+ }
+ return (0);
+ } else {
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+ if ((vh->sc_ac.ac_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
+ (IFF_UP|IFF_RUNNING) && ia->ia_ifp ==
+ &vh->sc_ac.ac_if) {
+ *enaddr = vh->sc_ac.ac_enaddr;
+ }
+ }
+ }
+
+ return (1);
+}
+
+struct ifnet *
+carp_forus(void *v, void *dhost)
+{
+ struct carp_if *cif = v;
+ struct carp_softc *vh;
+ u_int8_t *ena = dhost;
+
+ if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
+ return (NULL);
+
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
+ if ((vh->sc_ac.ac_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
+ (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER &&
+ !bcmp(dhost, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN))
+ return (&vh->sc_ac.ac_if);
+
+ return (NULL);
+}
+
+void
+carp_master_down(void *v)
+{
+ struct carp_softc *sc = v;
+
+ switch (sc->sc_state) {
+ case INIT:
+ printf("%s: master_down event in INIT state\n",
+ sc->sc_ac.ac_if.if_xname);
+ break;
+ case MASTER:
+ break;
+ case BACKUP:
+ carp_send_ad(sc);
+ carp_send_arp(sc);
+ sc->sc_state = MASTER;
+ carp_setrun(sc);
+ break;
+ }
+}
+
+void
+carp_setrun(struct carp_softc *sc)
+{
+ struct timeval tv;
+
+ if (sc->sc_ac.ac_if.if_flags & IFF_UP &&
+ sc->sc_vhid > 0 && sc->sc_naddrs)
+ sc->sc_ac.ac_if.if_flags |= IFF_RUNNING;
+ else {
+ sc->sc_ac.ac_if.if_flags &= ~IFF_RUNNING;
+ return;
+ }
+
+ switch (sc->sc_state) {
+ case INIT:
+ if (sc->sc_advskew == 0) {
+ carp_send_ad(sc);
+ carp_send_arp(sc);
+ sc->sc_state = MASTER;
+ } else {
+ sc->sc_state = BACKUP;
+ carp_setrun(sc);
+ }
+ break;
+ case BACKUP:
+ tv.tv_sec = 3 * sc->sc_advbase;
+ tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ timeout_add(&sc->sc_md_tmo, tvtohz(&tv));
+ break;
+ case MASTER:
+ tv.tv_sec = sc->sc_advbase;
+ tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ timeout_add(&sc->sc_ad_tmo, tvtohz(&tv));
+ break;
+ }
+}
+
+int
+carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+{
+ struct ifnet *ifp;
+ struct carp_if *cif;
+ struct in_ifaddr *ia, *ia_if;
+ struct ip_moptions *imo = &sc->sc_imo;
+ struct in_addr addr;
+ int own, error;
+
+ if (sin->sin_addr.s_addr == 0) {
+ if (!(sc->sc_ac.ac_if.if_flags & IFF_UP))
+ sc->sc_state = INIT;
+ if (sc->sc_naddrs)
+ sc->sc_ac.ac_if.if_flags |= IFF_UP;
+ carp_setrun(sc);
+ return 0;
+ }
+
+ /* we have to do it by hands to check we won't match on us */
+ ia_if = NULL; own = 0;
+ for (ia = TAILQ_FIRST(&in_ifaddr); ia; ia = TAILQ_NEXT(ia, ia_list)) {
+
+ /* and, yeah, we need a multicast-capable iface too */
+ if (ia->ia_ifp != &sc->sc_ac.ac_if &&
+ (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
+ (sin->sin_addr.s_addr & ia->ia_subnetmask) ==
+ ia->ia_subnet) {
+ if (!ia_if)
+ ia_if = ia;
+ if (sin->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
+ own++;
+ }
+ }
+
+ if (!ia_if)
+ return EADDRNOTAVAIL;
+ ia = ia_if;
+ ifp = ia->ia_ifp;
+
+ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
+ (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp))
+ return EADDRNOTAVAIL;
+
+ if (imo->imo_num_memberships == 0) {
+ addr.s_addr = INADDR_CARP_GROUP;
+ if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL)
+ return ENOBUFS;
+ imo->imo_num_memberships++;
+ imo->imo_multicast_ifp = ifp;
+ imo->imo_multicast_ttl = CARP_DFLTTL;
+ imo->imo_multicast_loop = 0;
+ }
+
+ if (!ifp->if_carp) {
+
+ MALLOC(cif, struct carp_if *, sizeof(*cif), M_IFADDR, M_WAITOK);
+ if (!cif || (error = ifpromisc(ifp, 1))) {
+ in_delmulti(imo->imo_membership[
+ --imo->imo_num_memberships]);
+ return cif? error : ENOBUFS;
+ }
+
+ cif->vhif_ifp = ifp;
+ TAILQ_INIT(&cif->vhif_vrs);
+ ifp->if_carp = (caddr_t)cif;
+
+ } else {
+ struct carp_softc *vr;
+
+ cif = (struct carp_if *)ifp->if_carp;
+ TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
+ if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
+ in_delmulti(imo->imo_membership[
+ --imo->imo_num_memberships]);
+ return EINVAL;
+ }
+ }
+ sc->sc_ia = ia;
+ sc->sc_ifp = ifp;
+
+ { /* XXX prevent endless loop if already in queue */
+ struct carp_softc *vr, *after = NULL;
+ int myself = 0;
+ cif = (struct carp_if *)ifp->if_carp;
+
+ TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
+ if (vr == sc)
+ myself = 1;
+ if (vr->sc_vhid < sc->sc_vhid)
+ after = vr;
+ }
+
+ if (!myself) {
+ /* We're trying to keep things in order */
+ if (after == NULL) {
+ TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
+ } else {
+ TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
+ }
+ cif->vhif_nvrs++;
+ }
+ }
+
+ sc->sc_naddrs++;
+ sc->sc_ac.ac_if.if_flags |= IFF_UP;
+ if (own)
+ sc->sc_advskew = 0;
+ sc->sc_state = INIT;
+ carp_setrun(sc);
+
+ return 0;
+}
+
+int
+carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+{
+ int error = 0;
+
+ if (!--sc->sc_naddrs) {
+ struct carp_if *cif = (struct carp_if *)sc->sc_ifp->if_carp;
+ struct ip_moptions *imo = &sc->sc_imo;
+
+ timeout_del(&sc->sc_ad_tmo);
+ sc->sc_ac.ac_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
+ sc->sc_vhid = -1;
+ in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+ imo->imo_multicast_ifp = NULL;
+ TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
+ if (!--cif->vhif_nvrs) {
+ sc->sc_ifp->if_carp = NULL;
+ FREE(cif, M_IFADDR);
+ }
+ }
+
+ return error;
+}
+
+int
+carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
+{
+ struct proc *p = curproc; /* XXX */
+ struct carp_softc *sc = ifp->if_softc, *vr;
+ struct carpreq carpr;
+ struct ifaddr *ifa;
+ struct ifreq *ifr;
+ struct ifaliasreq *ifra;
+ register int error = 0;
+
+ ifa = (struct ifaddr *)addr;
+ ifra = (struct ifaliasreq *)addr;
+ ifr = (struct ifreq *)addr;
+
+ switch (cmd) {
+ case SIOCSIFADDR:
+ if (ifa->ifa_addr->sa_family != AF_INET) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ sc->if_flags |= IFF_UP;
+ error = carp_set_addr(sc, satosin(ifa->ifa_addr));
+ break;
+
+ case SIOCAIFADDR:
+ if (ifra->ifra_addr.sa_family != AF_INET) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ sc->if_flags |= IFF_UP;
+ error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
+ break;
+
+ case SIOCDIFADDR:
+ if (ifra->ifra_addr.sa_family != AF_INET) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ sc->if_flags &= ~IFF_UP;
+ error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
+ break;
+
+ case SIOCSIFFLAGS:
+ if (sc->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) {
+ sc->if_flags &= ~IFF_UP;
+ timeout_del(&sc->sc_ad_tmo);
+ timeout_del(&sc->sc_md_tmo);
+ if (sc->sc_state == MASTER)
+ carp_send_ad(sc);
+ sc->sc_state = INIT;
+ carp_setrun(sc);
+ }
+ if (ifr->ifr_flags & IFF_UP && (sc->if_flags & IFF_UP) == 0) {
+ sc->if_flags |= IFF_UP;
+ sc->sc_state = INIT;
+ carp_setrun(sc);
+ }
+ break;
+
+ case SIOCSVH:
+ if ((error = suser(p, p->p_acflag)) != 0)
+ break;
+ if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
+ break;
+ error = 1;
+ if (carpr.carpr_vhid > 0) {
+ if (carpr.carpr_vhid > 255) {
+ error = EINVAL;
+ break;
+ }
+ if (sc->sc_ifp) {
+ struct carp_if *cif;
+ cif = (struct carp_if *)sc->sc_ifp->if_carp;
+ TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
+ if (vr != sc &&
+ vr->sc_vhid == carpr.carpr_vhid)
+ return EINVAL;
+ }
+ sc->sc_vhid = carpr.carpr_vhid;
+ sc->sc_ac.ac_enaddr[0] = 0;
+ sc->sc_ac.ac_enaddr[1] = 0;
+ sc->sc_ac.ac_enaddr[2] = 0x5e;
+ sc->sc_ac.ac_enaddr[3] = 0;
+ sc->sc_ac.ac_enaddr[4] = 1;
+ sc->sc_ac.ac_enaddr[5] = sc->sc_vhid;
+ error--;
+ }
+ if (carpr.carpr_advskew > 0) {
+ if (carpr.carpr_advskew >= 255 || sc->sc_advskew == 0) {
+ error = EINVAL;
+ break;
+ }
+ sc->sc_advskew = carpr.carpr_advskew;
+ error--;
+ }
+ if (carpr.carpr_advbase > 0) {
+ if (carpr.carpr_advbase > 255) {
+ error = EINVAL;
+ break;
+ }
+ sc->sc_advbase = carpr.carpr_advbase;
+ error--;
+ }
+ bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
+ if (error > 0)
+ error = EINVAL;
+ else {
+ error = 0;
+ carp_setrun(sc);
+ }
+ break;
+
+ case SIOCGVH:
+ bzero(&carpr, sizeof(carpr));
+ carpr.carpr_state = sc->sc_state;
+ carpr.carpr_vhid = sc->sc_vhid;
+ carpr.carpr_advbase = sc->sc_advbase;
+ carpr.carpr_advskew = sc->sc_advskew;
+ if (suser(p, p->p_acflag) == 0)
+ bcopy(sc->sc_key, carpr.carpr_key,
+ sizeof(carpr.carpr_key));
+ error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
+ break;
+
+ default:
+ error = EINVAL;
+ }
+
+ return (error);
+}
+
+
+/*
+ * Start output on carp interface. This function should never be called.
+ */
+void
+carp_start(struct ifnet *ifp)
+{
+#ifdef DEBUG
+ printf("%s: start called\n", ifp->if_xname);
+#endif
+}
+
+int
+carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
+ struct rtentry *rt)
+{
+ struct m_tag *mtag;
+ struct carp_softc *sc;
+
+ if (sa && sa->sa_family != AF_INET)
+ return 0;
+
+ mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
+ if (mtag == NULL)
+ return 0;
+
+ bcopy( mtag + 1, &sc, sizeof(struct carp_softc *));
+
+ /* Set the source MAC address to Virtual Router MAC Address */
+ switch (ifp->if_type) {
+#if NETHER > 0
+ case IFT_ETHER: {
+ register struct ether_header *eh;
+
+ eh = mtod(m, struct ether_header *);
+ eh->ether_shost[0] = 0;
+ eh->ether_shost[1] = 0;
+ eh->ether_shost[2] = 0x5e;
+ eh->ether_shost[3] = 0;
+ eh->ether_shost[4] = 1;
+ eh->ether_shost[5] = sc->sc_vhid;
+ }
+ break;
+#endif
+#if NFDDI > 0
+ case IFT_FDDI: {
+ register struct fddi_header *fh;
+
+ fh = mtod(m, struct fddi_header *);
+ fh->fddi_shost[0] = 0;
+ fh->fddi_shost[1] = 0;
+ fh->fddi_shost[2] = 0x5e;
+ fh->fddi_shost[3] = 0;
+ fh->fddi_shost[4] = 1;
+ fh->fddi_shost[5] = sc->sc_vhid;
+ }
+ break;
+#endif
+#if NTOKEN > 0
+ case IFT_ISO88025: {
+ register struct token_header *th;
+
+ th = mtod(m, struct token_header *);
+ th->token_shost[0] = 3;
+ th->token_shost[1] = 0;
+ th->token_shost[2] = 0x40 >> (sc->sc_vhid - 1);
+ th->token_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
+ th->token_shost[4] = 0;
+ th->token_shost[5] = 0;
+ }
+ break;
+#endif
+ default:
+ printf("%s: carp is not supported for this interface type\n",
+ ifp->if_xname);
+ return EOPNOTSUPP;
+ }
+
+ return 0;
+}
diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h
new file mode 100644
index 00000000000..53056bba2f2
--- /dev/null
+++ b/sys/netinet/ip_carp.h
@@ -0,0 +1,124 @@
+/* $OpenBSD: ip_carp.h,v 1.1 2003/10/17 21:04:58 mcbride Exp $ */
+
+/*
+ * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
+ * Copyright (c) 2003 Ryan McBride. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+struct carp_header {
+#if BYTE_ORDER == LITTLE_ENDIAN
+ u_int8_t carp_type:4,
+ carp_version:4;
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ u_int8_t carp_version:4,
+ carp_type:4;
+#endif
+ u_int8_t carp_vhid; /* virtual host id */
+ u_int8_t carp_advskew; /* advertisement skew */
+ u_int8_t carp_authlen; /* size of counter+md, 32bit chunks */
+ u_int8_t carp_pad1; /* reserved */
+ u_int8_t carp_advbase; /* advertisement interval */
+ u_int16_t carp_cksum;
+ u_int32_t carp_counter[2];
+ unsigned char carp_md[20]; /* sha1 message digest */
+} __attribute__((__packed__));
+
+#define CARP_DFLTTL 255
+
+/* carp_version */
+#define CARP_VERSION 2
+
+/* carp_type */
+#define CARP_ADVERTISEMENT 0x01
+#define CARP_LEAVE_GROUP 0x02
+
+#define CARP_KEY_LEN 20 /* a sha1 hash of a passphrase */
+
+/* carp_advbase */
+#define CARP_DFLTINTV 1
+
+/*
+ * Statistics.
+ */
+struct carpstats {
+ u_long carps_ipackets; /* total input packets */
+ u_long carps_badttl; /* TTL is not CARP_DFLTTL */
+ u_long carps_hdrops; /* packets shorter than header */
+ u_long carps_badsum; /* bad checksum */
+ u_long carps_badver; /* bad (incl unsupp) version */
+ u_long carps_badlen; /* data length does not match */
+ u_long carps_badauth; /* bad authentication */
+ u_long carps_badvhid; /* bad VHID */
+ u_long carps_badaddrs; /* bad address list */
+
+ u_long carps_opackets; /* total output packets */
+ u_long carps_onomem; /* no memory for an mbuf for a send */
+ u_long carps_ostates; /* total state updates sent */
+
+ u_long carps_preempt; /* if enabled, high-pri preemptions */
+};
+
+/*
+ * Configuration structure for SIOCSVH SIOCGVH
+ */
+struct carpreq {
+ int carpr_state;
+#define CARP_STATES "INIT", "BACKUP", "MASTER"
+#define CARP_MAXSTATE 2
+ int carpr_vhid;
+ int carpr_advskew;
+ int carpr_advbase;
+ unsigned char carpr_key[CARP_KEY_LEN];
+};
+#define SIOCSVH _IOWR('i', 245, struct ifreq)
+#define SIOCGVH _IOWR('i', 246, struct ifreq)
+
+/*
+ * Names for CARP sysctl objects
+ */
+#define CARPCTL_ALLOW 1 /* accept incoming CARP packets */
+#define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */
+#define CARPCTL_LOG 3 /* log bad packets */
+#define CARPCTL_ARPBALANCE 4 /* balance arp responses */
+#define CARPCTL_MAXID 5
+
+#define CARPCTL_NAMES { \
+ { 0, 0 }, \
+ { "allow", CTLTYPE_INT }, \
+ { "preempt", CTLTYPE_INT }, \
+ { "log", CTLTYPE_INT }, \
+ { "arpbalance", CTLTYPE_INT }, \
+}
+
+#ifdef _KERNEL
+void carp_ifdetach (struct ifnet *);
+void carp_input (struct mbuf *, ...);
+int carp_output (struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct rtentry *);
+int carp_iamatch (void *, struct in_ifaddr *, struct in_addr *,
+ u_int8_t **);
+struct ifnet *carp_forus (void *, void *);
+int carp_sysctl (int *, u_int, void *, size_t *, void *, size_t);
+#endif