summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Pieuchot <mpi@cvs.openbsd.org>2015-09-28 08:36:25 +0000
committerMartin Pieuchot <mpi@cvs.openbsd.org>2015-09-28 08:36:25 +0000
commit04d26e2938282ede17d8164a859194648ad77d26 (patch)
treeb1a7c869d3076e4fc06a7863174f77ec4bf0dd16
parentdc4e57708d163e08468627d07925b7ca9e60bf53 (diff)
Factors ou the route hashing code to implement Equal-Cost Multi-Path
for ART. While here sync the two remaining mix() macros. ok chris@, dlg@
-rw-r--r--sys/net/radix_mpath.c116
-rw-r--r--sys/net/radix_mpath.h4
-rw-r--r--sys/net/route.c76
-rw-r--r--sys/net/rtable.c121
-rw-r--r--sys/net/rtable.h4
-rw-r--r--sys/netinet/ip_carp.c27
6 files changed, 180 insertions, 168 deletions
diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c
index 54b78e4c9bd..e24d9c34b62 100644
--- a/sys/net/radix_mpath.c
+++ b/sys/net/radix_mpath.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: radix_mpath.c,v 1.32 2015/09/01 21:24:04 bluhm Exp $ */
+/* $OpenBSD: radix_mpath.c,v 1.33 2015/09/28 08:36:24 mpi Exp $ */
/* $KAME: radix_mpath.c,v 1.13 2002/10/28 21:05:59 itojun Exp $ */
/*
@@ -52,13 +52,6 @@
#include <netinet6/ip6_var.h>
#endif
-u_int32_t rn_mpath_hash(struct sockaddr *, u_int32_t *);
-
-/*
- * give some jitter to hash, to avoid synchronization between routers
- */
-static u_int32_t hashjitter;
-
int
rn_mpath_capable(struct radix_node_head *rnh)
{
@@ -380,110 +373,3 @@ rn_mpath_reprio(struct radix_node *rn, int newprio)
}
}
}
-
-/* Gateway selection by Hash-Threshold (RFC 2992) */
-struct rtentry *
-rn_mpath_select(struct rtentry *rt, uint32_t *srcaddrp)
-{
- struct radix_node *rn;
- int hash, npaths, threshold;
-
- rn = (struct radix_node *)rt;
- npaths = rn_mpath_active_count(rn);
- hash = rn_mpath_hash(rt_key(rt), srcaddrp) & 0xffff;
- threshold = 1 + (0xffff / npaths);
- while (hash > threshold && rn) {
- /* stay within the multipath routes */
- rn = rn_mpath_next(rn, RMP_MODE_ACTIVE);
- hash -= threshold;
- }
-
- /* if gw selection fails, use the first match (default) */
- if (rn != NULL) {
- rtfree(rt);
- rt = (struct rtentry *)rn;
- rt->rt_refcnt++;
- }
-
- return (rt);
-}
-
-int
-rn_mpath_inithead(void **head, int off)
-{
- struct radix_node_head *rnh;
-
- while (hashjitter == 0)
- hashjitter = arc4random();
- if (rn_inithead(head, off) == 1) {
- rnh = (struct radix_node_head *)*head;
- rnh->rnh_multipath = 1;
- return 1;
- } else
- return 0;
-}
-
-/*
- * hash function based on pf_hash in pf.c
- */
-#define mix(a,b,c) \
- do { \
- a -= b; a -= c; a ^= (c >> 13); \
- b -= c; b -= a; b ^= (a << 8); \
- c -= a; c -= b; c ^= (b >> 13); \
- a -= b; a -= c; a ^= (c >> 12); \
- b -= c; b -= a; b ^= (a << 16); \
- c -= a; c -= b; c ^= (b >> 5); \
- a -= b; a -= c; a ^= (c >> 3); \
- b -= c; b -= a; b ^= (a << 10); \
- c -= a; c -= b; c ^= (b >> 15); \
- } while (0)
-
-u_int32_t
-rn_mpath_hash(struct sockaddr *dst, u_int32_t *srcaddrp)
-{
- u_int32_t a, b, c;
-
- a = b = 0x9e3779b9;
- c = hashjitter;
-
- switch (dst->sa_family) {
- case AF_INET:
- {
- struct sockaddr_in *sin_dst;
-
- sin_dst = satosin(dst);
- a += sin_dst->sin_addr.s_addr;
- b += srcaddrp ? srcaddrp[0] : 0;
- mix(a, b, c);
- break;
- }
-#ifdef INET6
- case AF_INET6:
- {
- struct sockaddr_in6 *sin6_dst;
-
- sin6_dst = satosin6(dst);
- a += sin6_dst->sin6_addr.s6_addr32[0];
- b += sin6_dst->sin6_addr.s6_addr32[2];
- c += srcaddrp ? srcaddrp[0] : 0;
- mix(a, b, c);
- a += sin6_dst->sin6_addr.s6_addr32[1];
- b += sin6_dst->sin6_addr.s6_addr32[3];
- c += srcaddrp ? srcaddrp[1] : 0;
- mix(a, b, c);
- a += sin6_dst->sin6_addr.s6_addr32[2];
- b += sin6_dst->sin6_addr.s6_addr32[1];
- c += srcaddrp ? srcaddrp[2] : 0;
- mix(a, b, c);
- a += sin6_dst->sin6_addr.s6_addr32[3];
- b += sin6_dst->sin6_addr.s6_addr32[0];
- c += srcaddrp ? srcaddrp[3] : 0;
- mix(a, b, c);
- break;
- }
-#endif /* INET6 */
- }
-
- return c;
-}
diff --git a/sys/net/radix_mpath.h b/sys/net/radix_mpath.h
index a378a0d7020..5466bbb4f40 100644
--- a/sys/net/radix_mpath.h
+++ b/sys/net/radix_mpath.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: radix_mpath.h,v 1.16 2015/07/18 15:51:16 mpi Exp $ */
+/* $OpenBSD: radix_mpath.h,v 1.17 2015/09/28 08:36:24 mpi Exp $ */
/* $KAME: radix_mpath.h,v 1.9 2004/03/30 11:21:49 keiichi Exp $ */
/*
@@ -54,10 +54,8 @@ void rn_mpath_adj_mpflag(struct radix_node *, u_int8_t);
int rn_mpath_active_count(struct radix_node *);
struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *,
u_int8_t);
-struct rtentry *rn_mpath_select(struct rtentry *, uint32_t *);
int rt_mpath_conflict(struct radix_node_head *, struct sockaddr *,
struct sockaddr *, struct sockaddr *, u_int8_t, int);
-int rn_mpath_inithead(void **, int);
#endif /* _KERNEL */
#endif /* _NET_RADIX_MPATH_H_ */
diff --git a/sys/net/route.c b/sys/net/route.c
index ea879e4527f..b479f0e070f 100644
--- a/sys/net/route.c
+++ b/sys/net/route.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: route.c,v 1.243 2015/09/25 09:51:20 mpi Exp $ */
+/* $OpenBSD: route.c,v 1.244 2015/09/28 08:36:24 mpi Exp $ */
/* $NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $ */
/*
@@ -136,6 +136,9 @@
#include <net/if_enc.h>
#endif
+/* Give some jitter to hash, to avoid synchronization between routers. */
+static uint32_t rt_hashjitter;
+
struct rtstat rtstat;
void ***rt_tables;
u_int8_t af2rtafidx[AF_MAX+1];
@@ -227,6 +230,9 @@ route_init(void)
af2rtafidx[dom->dom_family] = rtafidx_max++;
}
+ while (rt_hashjitter == 0)
+ rt_hashjitter = arc4random();
+
if (rtable_add(0) != 0)
panic("route_init rtable_add");
}
@@ -370,6 +376,72 @@ miss:
}
#ifndef SMALL_KERNEL
+
+/*
+ * Originated from bridge_hash() in if_bridge.c
+ */
+#define mix(a, b, c) do { \
+ a -= b; a -= c; a ^= (c >> 13); \
+ b -= c; b -= a; b ^= (a << 8); \
+ c -= a; c -= b; c ^= (b >> 13); \
+ a -= b; a -= c; a ^= (c >> 12); \
+ b -= c; b -= a; b ^= (a << 16); \
+ c -= a; c -= b; c ^= (b >> 5); \
+ a -= b; a -= c; a ^= (c >> 3); \
+ b -= c; b -= a; b ^= (a << 10); \
+ c -= a; c -= b; c ^= (b >> 15); \
+} while (0)
+
+static uint32_t
+rt_hash(struct rtentry *rt, uint32_t *src)
+{
+ struct sockaddr *dst = rt_key(rt);
+ uint32_t a, b, c;
+
+ a = b = 0x9e3779b9;
+ c = rt_hashjitter;
+
+ switch (dst->sa_family) {
+ case AF_INET:
+ {
+ struct sockaddr_in *sin;
+
+ sin = satosin(dst);
+ a += sin->sin_addr.s_addr;
+ b += (src != NULL) ? src[0] : 0;
+ mix(a, b, c);
+ break;
+ }
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = satosin6(dst);
+ a += sin6->sin6_addr.s6_addr32[0];
+ b += sin6->sin6_addr.s6_addr32[2];
+ c += (src != NULL) ? src[0] : 0;
+ mix(a, b, c);
+ a += sin6->sin6_addr.s6_addr32[1];
+ b += sin6->sin6_addr.s6_addr32[3];
+ c += (src != NULL) ? src[1] : 0;
+ mix(a, b, c);
+ a += sin6->sin6_addr.s6_addr32[2];
+ b += sin6->sin6_addr.s6_addr32[1];
+ c += (src != NULL) ? src[2] : 0;
+ mix(a, b, c);
+ a += sin6->sin6_addr.s6_addr32[3];
+ b += sin6->sin6_addr.s6_addr32[0];
+ c += (src != NULL) ? src[3] : 0;
+ mix(a, b, c);
+ break;
+ }
+#endif /* INET6 */
+ }
+
+ return (c);
+}
+
/*
* Allocate a route, potentially using multipath to select the peer.
*/
@@ -393,7 +465,7 @@ rtalloc_mpath(struct sockaddr *dst, uint32_t *src, unsigned int rtableid)
))
return (rt);
- return (rtable_mpath_select(rt, src));
+ return (rtable_mpath_select(rt, rt_hash(rt, src) & 0xffff));
}
#endif /* SMALL_KERNEL */
diff --git a/sys/net/rtable.c b/sys/net/rtable.c
index 2cd58309cb0..d1b18eebd9d 100644
--- a/sys/net/rtable.c
+++ b/sys/net/rtable.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: rtable.c,v 1.6 2015/09/12 09:22:29 mpi Exp $ */
+/* $OpenBSD: rtable.c,v 1.7 2015/09/28 08:36:24 mpi Exp $ */
/*
* Copyright (c) 2014-2015 Martin Pieuchot
@@ -39,11 +39,14 @@ rtable_attach(void **head, int off)
{
int rv;
-#ifndef SMALL_KERNEL
- rv = rn_mpath_inithead(head, off);
-#else
rv = rn_inithead(head, off);
-#endif
+
+#ifndef SMALL_KERNEL
+ if (rv == 1) {
+ struct radix_node_head *rnh = (struct radix_node_head *)*head;
+ rnh->rnh_multipath = 1;
+ }
+#endif /* SMALL_KERNEL */
return (rv);
}
@@ -206,10 +209,34 @@ rtable_mpath_conflict(unsigned int rtableid, struct sockaddr *dst,
return (rt_mpath_conflict(rnh, dst, mask, gateway, prio, mpathok));
}
+/* Gateway selection by Hash-Threshold (RFC 2992) */
struct rtentry *
-rtable_mpath_select(struct rtentry *rt, uint32_t *src)
+rtable_mpath_select(struct rtentry *rt, uint32_t hash)
{
- return (rn_mpath_select(rt, src));
+ struct rtentry *mrt = rt;
+ int npaths, threshold;
+
+ npaths = 1;
+ while ((mrt = rt_mpath_next(mrt)) != NULL)
+ npaths++;
+
+ threshold = (0xffff / npaths) + 1;
+
+ mrt = rt;
+ while (hash > threshold && mrt != NULL) {
+ /* stay within the multipath routes */
+ mrt = rt_mpath_next(mrt);
+ hash -= threshold;
+ }
+
+ /* if gw selection fails, use the first match (default) */
+ if (mrt != NULL) {
+ rtref(mrt);
+ rtfree(rt);
+ rt = mrt;
+ }
+
+ return (rt);
}
void
@@ -391,27 +418,13 @@ rtable_insert(unsigned int rtableid, struct sockaddr *dst,
rt->rt_mask = msk;
}
-#ifndef SMALL_KERNEL
- if ((mrt = LIST_FIRST(&an->an_rtlist)) != NULL) {
- /*
- * Select the order of the MPATH routes.
- */
- while (LIST_NEXT(mrt, rt_next) != NULL) {
- if (mrt->rt_priority > prio)
- break;
- mrt = LIST_NEXT(mrt, rt_next);
- }
-
- if (mrt->rt_priority > prio)
- LIST_INSERT_BEFORE(mrt, rt, rt_next);
- else
- LIST_INSERT_AFTER(mrt, rt, rt_next);
+ LIST_INSERT_HEAD(&an->an_rtlist, rt, rt_next);
- return (0);
- }
+#ifndef SMALL_KERNEL
+ /* Put newly inserted entry at the right place. */
+ rtable_mpath_reprio(rt, rt->rt_priority);
#endif /* SMALL_KERNEL */
- LIST_INSERT_HEAD(&an->an_rtlist, rt, rt_next);
return (0);
}
@@ -602,21 +615,65 @@ rtable_mpath_conflict(unsigned int rtableid, struct sockaddr *dst,
return (0);
}
+/* Gateway selection by Hash-Threshold (RFC 2992) */
struct rtentry *
-rtable_mpath_select(struct rtentry *rt, uint32_t *src)
+rtable_mpath_select(struct rtentry *rt, uint32_t hash)
{
struct art_node *an = rt->rt_node;
+ struct rtentry *mrt;
+ int npaths, threshold;
- /*
- * XXX consider using ``src'' (8
- */
- return (LIST_FIRST(&an->an_rtlist));
+ npaths = 0;
+ LIST_FOREACH(mrt, &an->an_rtlist, rt_next) {
+ /* Only count nexthops with the same priority. */
+ if (mrt->rt_priority == rt->rt_priority)
+ npaths++;
+ }
+
+ threshold = (0xffff / npaths) + 1;
+
+ mrt = LIST_FIRST(&an->an_rtlist);
+ while (hash > threshold && mrt != NULL) {
+ if (mrt->rt_priority == rt->rt_priority)
+ hash -= threshold;
+ mrt = LIST_NEXT(mrt, rt_next);
+ }
+
+ if (mrt != NULL) {
+ rtref(mrt);
+ rtfree(rt);
+ rt = mrt;
+ }
+
+ return (rt);
}
void
-rtable_mpath_reprio(struct rtentry *rt, uint8_t newprio)
+rtable_mpath_reprio(struct rtentry *rt, uint8_t prio)
{
- /* XXX */
+ struct art_node *an = rt->rt_node;
+ struct rtentry *mrt;
+
+ LIST_REMOVE(rt, rt_next);
+ rt->rt_priority = prio;
+
+ if ((mrt = LIST_FIRST(&an->an_rtlist)) != NULL) {
+ /*
+ * Select the order of the MPATH routes.
+ */
+ while (LIST_NEXT(mrt, rt_next) != NULL) {
+ if (mrt->rt_priority > prio)
+ break;
+ mrt = LIST_NEXT(mrt, rt_next);
+ }
+
+ if (mrt->rt_priority > prio)
+ LIST_INSERT_BEFORE(mrt, rt, rt_next);
+ else
+ LIST_INSERT_AFTER(mrt, rt, rt_next);
+ } else {
+ LIST_INSERT_HEAD(&an->an_rtlist, rt, rt_next);
+ }
}
#endif /* SMALL_KERNEL */
diff --git a/sys/net/rtable.h b/sys/net/rtable.h
index 19b559c9055..f1f40eb86f8 100644
--- a/sys/net/rtable.h
+++ b/sys/net/rtable.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: rtable.h,v 1.3 2015/09/04 08:43:39 mpi Exp $ */
+/* $OpenBSD: rtable.h,v 1.4 2015/09/28 08:36:24 mpi Exp $ */
/*
* Copyright (c) 2014-2015 Martin Pieuchot
@@ -69,6 +69,6 @@ struct rtentry *rtable_mpath_match(unsigned int, struct rtentry *,
struct sockaddr *, uint8_t);
int rtable_mpath_conflict(unsigned int, struct sockaddr *,
struct sockaddr *, struct sockaddr *, uint8_t, int);
-struct rtentry *rtable_mpath_select(struct rtentry *, uint32_t *);
+struct rtentry *rtable_mpath_select(struct rtentry *, uint32_t);
void rtable_mpath_reprio(struct rtentry *, uint8_t);
#endif /* _NET_RTABLE_H_ */
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
index 86d6d8e8c11..e62a6497db8 100644
--- a/sys/netinet/ip_carp.c
+++ b/sys/netinet/ip_carp.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ip_carp.c,v 1.272 2015/09/27 04:27:57 dlg Exp $ */
+/* $OpenBSD: ip_carp.c,v 1.273 2015/09/28 08:36:24 mpi Exp $ */
/*
* Copyright (c) 2002 Michael Shalayeff. All rights reserved.
@@ -1298,20 +1298,19 @@ carp_send_na(struct carp_softc *sc)
#endif /* INET6 */
/*
- * Based on bridge_hash() in if_bridge.c
+ * Originated from bridge_hash() in if_bridge.c
*/
-#define mix(a,b,c) \
- do { \
- a -= b; a -= c; a ^= (c >> 13); \
- b -= c; b -= a; b ^= (a << 8); \
- c -= a; c -= b; c ^= (b >> 13); \
- a -= b; a -= c; a ^= (c >> 12); \
- b -= c; b -= a; b ^= (a << 16); \
- c -= a; c -= b; c ^= (b >> 5); \
- a -= b; a -= c; a ^= (c >> 3); \
- b -= c; b -= a; b ^= (a << 10); \
- c -= a; c -= b; c ^= (b >> 15); \
- } while (0)
+#define mix(a, b, c) do { \
+ a -= b; a -= c; a ^= (c >> 13); \
+ b -= c; b -= a; b ^= (a << 8); \
+ c -= a; c -= b; c ^= (b >> 13); \
+ a -= b; a -= c; a ^= (c >> 12); \
+ b -= c; b -= a; b ^= (a << 16); \
+ c -= a; c -= b; c ^= (b >> 5); \
+ a -= b; a -= c; a ^= (c >> 3); \
+ b -= c; b -= a; b ^= (a << 10); \
+ c -= a; c -= b; c ^= (b >> 15); \
+} while (0)
u_int32_t
carp_hash(struct carp_softc *sc, u_char *src)