diff options
author | Martin Pieuchot <mpi@cvs.openbsd.org> | 2015-09-28 08:36:25 +0000 |
---|---|---|
committer | Martin Pieuchot <mpi@cvs.openbsd.org> | 2015-09-28 08:36:25 +0000 |
commit | 04d26e2938282ede17d8164a859194648ad77d26 (patch) | |
tree | b1a7c869d3076e4fc06a7863174f77ec4bf0dd16 | |
parent | dc4e57708d163e08468627d07925b7ca9e60bf53 (diff) |
Factors ou the route hashing code to implement Equal-Cost Multi-Path
for ART.
While here sync the two remaining mix() macros.
ok chris@, dlg@
-rw-r--r-- | sys/net/radix_mpath.c | 116 | ||||
-rw-r--r-- | sys/net/radix_mpath.h | 4 | ||||
-rw-r--r-- | sys/net/route.c | 76 | ||||
-rw-r--r-- | sys/net/rtable.c | 121 | ||||
-rw-r--r-- | sys/net/rtable.h | 4 | ||||
-rw-r--r-- | sys/netinet/ip_carp.c | 27 |
6 files changed, 180 insertions, 168 deletions
diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c index 54b78e4c9bd..e24d9c34b62 100644 --- a/sys/net/radix_mpath.c +++ b/sys/net/radix_mpath.c @@ -1,4 +1,4 @@ -/* $OpenBSD: radix_mpath.c,v 1.32 2015/09/01 21:24:04 bluhm Exp $ */ +/* $OpenBSD: radix_mpath.c,v 1.33 2015/09/28 08:36:24 mpi Exp $ */ /* $KAME: radix_mpath.c,v 1.13 2002/10/28 21:05:59 itojun Exp $ */ /* @@ -52,13 +52,6 @@ #include <netinet6/ip6_var.h> #endif -u_int32_t rn_mpath_hash(struct sockaddr *, u_int32_t *); - -/* - * give some jitter to hash, to avoid synchronization between routers - */ -static u_int32_t hashjitter; - int rn_mpath_capable(struct radix_node_head *rnh) { @@ -380,110 +373,3 @@ rn_mpath_reprio(struct radix_node *rn, int newprio) } } } - -/* Gateway selection by Hash-Threshold (RFC 2992) */ -struct rtentry * -rn_mpath_select(struct rtentry *rt, uint32_t *srcaddrp) -{ - struct radix_node *rn; - int hash, npaths, threshold; - - rn = (struct radix_node *)rt; - npaths = rn_mpath_active_count(rn); - hash = rn_mpath_hash(rt_key(rt), srcaddrp) & 0xffff; - threshold = 1 + (0xffff / npaths); - while (hash > threshold && rn) { - /* stay within the multipath routes */ - rn = rn_mpath_next(rn, RMP_MODE_ACTIVE); - hash -= threshold; - } - - /* if gw selection fails, use the first match (default) */ - if (rn != NULL) { - rtfree(rt); - rt = (struct rtentry *)rn; - rt->rt_refcnt++; - } - - return (rt); -} - -int -rn_mpath_inithead(void **head, int off) -{ - struct radix_node_head *rnh; - - while (hashjitter == 0) - hashjitter = arc4random(); - if (rn_inithead(head, off) == 1) { - rnh = (struct radix_node_head *)*head; - rnh->rnh_multipath = 1; - return 1; - } else - return 0; -} - -/* - * hash function based on pf_hash in pf.c - */ -#define mix(a,b,c) \ - do { \ - a -= b; a -= c; a ^= (c >> 13); \ - b -= c; b -= a; b ^= (a << 8); \ - c -= a; c -= b; c ^= (b >> 13); \ - a -= b; a -= c; a ^= (c >> 12); \ - b -= c; b -= a; b ^= (a << 16); \ - c -= a; c -= b; c ^= (b >> 5); \ - a -= b; a -= c; a ^= (c >> 3); \ - b -= c; b -= a; b ^= (a << 10); \ - c -= a; c -= b; c ^= (b >> 15); \ - } while (0) - -u_int32_t -rn_mpath_hash(struct sockaddr *dst, u_int32_t *srcaddrp) -{ - u_int32_t a, b, c; - - a = b = 0x9e3779b9; - c = hashjitter; - - switch (dst->sa_family) { - case AF_INET: - { - struct sockaddr_in *sin_dst; - - sin_dst = satosin(dst); - a += sin_dst->sin_addr.s_addr; - b += srcaddrp ? srcaddrp[0] : 0; - mix(a, b, c); - break; - } -#ifdef INET6 - case AF_INET6: - { - struct sockaddr_in6 *sin6_dst; - - sin6_dst = satosin6(dst); - a += sin6_dst->sin6_addr.s6_addr32[0]; - b += sin6_dst->sin6_addr.s6_addr32[2]; - c += srcaddrp ? srcaddrp[0] : 0; - mix(a, b, c); - a += sin6_dst->sin6_addr.s6_addr32[1]; - b += sin6_dst->sin6_addr.s6_addr32[3]; - c += srcaddrp ? srcaddrp[1] : 0; - mix(a, b, c); - a += sin6_dst->sin6_addr.s6_addr32[2]; - b += sin6_dst->sin6_addr.s6_addr32[1]; - c += srcaddrp ? srcaddrp[2] : 0; - mix(a, b, c); - a += sin6_dst->sin6_addr.s6_addr32[3]; - b += sin6_dst->sin6_addr.s6_addr32[0]; - c += srcaddrp ? srcaddrp[3] : 0; - mix(a, b, c); - break; - } -#endif /* INET6 */ - } - - return c; -} diff --git a/sys/net/radix_mpath.h b/sys/net/radix_mpath.h index a378a0d7020..5466bbb4f40 100644 --- a/sys/net/radix_mpath.h +++ b/sys/net/radix_mpath.h @@ -1,4 +1,4 @@ -/* $OpenBSD: radix_mpath.h,v 1.16 2015/07/18 15:51:16 mpi Exp $ */ +/* $OpenBSD: radix_mpath.h,v 1.17 2015/09/28 08:36:24 mpi Exp $ */ /* $KAME: radix_mpath.h,v 1.9 2004/03/30 11:21:49 keiichi Exp $ */ /* @@ -54,10 +54,8 @@ void rn_mpath_adj_mpflag(struct radix_node *, u_int8_t); int rn_mpath_active_count(struct radix_node *); struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *, u_int8_t); -struct rtentry *rn_mpath_select(struct rtentry *, uint32_t *); int rt_mpath_conflict(struct radix_node_head *, struct sockaddr *, struct sockaddr *, struct sockaddr *, u_int8_t, int); -int rn_mpath_inithead(void **, int); #endif /* _KERNEL */ #endif /* _NET_RADIX_MPATH_H_ */ diff --git a/sys/net/route.c b/sys/net/route.c index ea879e4527f..b479f0e070f 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -1,4 +1,4 @@ -/* $OpenBSD: route.c,v 1.243 2015/09/25 09:51:20 mpi Exp $ */ +/* $OpenBSD: route.c,v 1.244 2015/09/28 08:36:24 mpi Exp $ */ /* $NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $ */ /* @@ -136,6 +136,9 @@ #include <net/if_enc.h> #endif +/* Give some jitter to hash, to avoid synchronization between routers. */ +static uint32_t rt_hashjitter; + struct rtstat rtstat; void ***rt_tables; u_int8_t af2rtafidx[AF_MAX+1]; @@ -227,6 +230,9 @@ route_init(void) af2rtafidx[dom->dom_family] = rtafidx_max++; } + while (rt_hashjitter == 0) + rt_hashjitter = arc4random(); + if (rtable_add(0) != 0) panic("route_init rtable_add"); } @@ -370,6 +376,72 @@ miss: } #ifndef SMALL_KERNEL + +/* + * Originated from bridge_hash() in if_bridge.c + */ +#define mix(a, b, c) do { \ + a -= b; a -= c; a ^= (c >> 13); \ + b -= c; b -= a; b ^= (a << 8); \ + c -= a; c -= b; c ^= (b >> 13); \ + a -= b; a -= c; a ^= (c >> 12); \ + b -= c; b -= a; b ^= (a << 16); \ + c -= a; c -= b; c ^= (b >> 5); \ + a -= b; a -= c; a ^= (c >> 3); \ + b -= c; b -= a; b ^= (a << 10); \ + c -= a; c -= b; c ^= (b >> 15); \ +} while (0) + +static uint32_t +rt_hash(struct rtentry *rt, uint32_t *src) +{ + struct sockaddr *dst = rt_key(rt); + uint32_t a, b, c; + + a = b = 0x9e3779b9; + c = rt_hashjitter; + + switch (dst->sa_family) { + case AF_INET: + { + struct sockaddr_in *sin; + + sin = satosin(dst); + a += sin->sin_addr.s_addr; + b += (src != NULL) ? src[0] : 0; + mix(a, b, c); + break; + } +#ifdef INET6 + case AF_INET6: + { + struct sockaddr_in6 *sin6; + + sin6 = satosin6(dst); + a += sin6->sin6_addr.s6_addr32[0]; + b += sin6->sin6_addr.s6_addr32[2]; + c += (src != NULL) ? src[0] : 0; + mix(a, b, c); + a += sin6->sin6_addr.s6_addr32[1]; + b += sin6->sin6_addr.s6_addr32[3]; + c += (src != NULL) ? src[1] : 0; + mix(a, b, c); + a += sin6->sin6_addr.s6_addr32[2]; + b += sin6->sin6_addr.s6_addr32[1]; + c += (src != NULL) ? src[2] : 0; + mix(a, b, c); + a += sin6->sin6_addr.s6_addr32[3]; + b += sin6->sin6_addr.s6_addr32[0]; + c += (src != NULL) ? src[3] : 0; + mix(a, b, c); + break; + } +#endif /* INET6 */ + } + + return (c); +} + /* * Allocate a route, potentially using multipath to select the peer. */ @@ -393,7 +465,7 @@ rtalloc_mpath(struct sockaddr *dst, uint32_t *src, unsigned int rtableid) )) return (rt); - return (rtable_mpath_select(rt, src)); + return (rtable_mpath_select(rt, rt_hash(rt, src) & 0xffff)); } #endif /* SMALL_KERNEL */ diff --git a/sys/net/rtable.c b/sys/net/rtable.c index 2cd58309cb0..d1b18eebd9d 100644 --- a/sys/net/rtable.c +++ b/sys/net/rtable.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rtable.c,v 1.6 2015/09/12 09:22:29 mpi Exp $ */ +/* $OpenBSD: rtable.c,v 1.7 2015/09/28 08:36:24 mpi Exp $ */ /* * Copyright (c) 2014-2015 Martin Pieuchot @@ -39,11 +39,14 @@ rtable_attach(void **head, int off) { int rv; -#ifndef SMALL_KERNEL - rv = rn_mpath_inithead(head, off); -#else rv = rn_inithead(head, off); -#endif + +#ifndef SMALL_KERNEL + if (rv == 1) { + struct radix_node_head *rnh = (struct radix_node_head *)*head; + rnh->rnh_multipath = 1; + } +#endif /* SMALL_KERNEL */ return (rv); } @@ -206,10 +209,34 @@ rtable_mpath_conflict(unsigned int rtableid, struct sockaddr *dst, return (rt_mpath_conflict(rnh, dst, mask, gateway, prio, mpathok)); } +/* Gateway selection by Hash-Threshold (RFC 2992) */ struct rtentry * -rtable_mpath_select(struct rtentry *rt, uint32_t *src) +rtable_mpath_select(struct rtentry *rt, uint32_t hash) { - return (rn_mpath_select(rt, src)); + struct rtentry *mrt = rt; + int npaths, threshold; + + npaths = 1; + while ((mrt = rt_mpath_next(mrt)) != NULL) + npaths++; + + threshold = (0xffff / npaths) + 1; + + mrt = rt; + while (hash > threshold && mrt != NULL) { + /* stay within the multipath routes */ + mrt = rt_mpath_next(mrt); + hash -= threshold; + } + + /* if gw selection fails, use the first match (default) */ + if (mrt != NULL) { + rtref(mrt); + rtfree(rt); + rt = mrt; + } + + return (rt); } void @@ -391,27 +418,13 @@ rtable_insert(unsigned int rtableid, struct sockaddr *dst, rt->rt_mask = msk; } -#ifndef SMALL_KERNEL - if ((mrt = LIST_FIRST(&an->an_rtlist)) != NULL) { - /* - * Select the order of the MPATH routes. - */ - while (LIST_NEXT(mrt, rt_next) != NULL) { - if (mrt->rt_priority > prio) - break; - mrt = LIST_NEXT(mrt, rt_next); - } - - if (mrt->rt_priority > prio) - LIST_INSERT_BEFORE(mrt, rt, rt_next); - else - LIST_INSERT_AFTER(mrt, rt, rt_next); + LIST_INSERT_HEAD(&an->an_rtlist, rt, rt_next); - return (0); - } +#ifndef SMALL_KERNEL + /* Put newly inserted entry at the right place. */ + rtable_mpath_reprio(rt, rt->rt_priority); #endif /* SMALL_KERNEL */ - LIST_INSERT_HEAD(&an->an_rtlist, rt, rt_next); return (0); } @@ -602,21 +615,65 @@ rtable_mpath_conflict(unsigned int rtableid, struct sockaddr *dst, return (0); } +/* Gateway selection by Hash-Threshold (RFC 2992) */ struct rtentry * -rtable_mpath_select(struct rtentry *rt, uint32_t *src) +rtable_mpath_select(struct rtentry *rt, uint32_t hash) { struct art_node *an = rt->rt_node; + struct rtentry *mrt; + int npaths, threshold; - /* - * XXX consider using ``src'' (8 - */ - return (LIST_FIRST(&an->an_rtlist)); + npaths = 0; + LIST_FOREACH(mrt, &an->an_rtlist, rt_next) { + /* Only count nexthops with the same priority. */ + if (mrt->rt_priority == rt->rt_priority) + npaths++; + } + + threshold = (0xffff / npaths) + 1; + + mrt = LIST_FIRST(&an->an_rtlist); + while (hash > threshold && mrt != NULL) { + if (mrt->rt_priority == rt->rt_priority) + hash -= threshold; + mrt = LIST_NEXT(mrt, rt_next); + } + + if (mrt != NULL) { + rtref(mrt); + rtfree(rt); + rt = mrt; + } + + return (rt); } void -rtable_mpath_reprio(struct rtentry *rt, uint8_t newprio) +rtable_mpath_reprio(struct rtentry *rt, uint8_t prio) { - /* XXX */ + struct art_node *an = rt->rt_node; + struct rtentry *mrt; + + LIST_REMOVE(rt, rt_next); + rt->rt_priority = prio; + + if ((mrt = LIST_FIRST(&an->an_rtlist)) != NULL) { + /* + * Select the order of the MPATH routes. + */ + while (LIST_NEXT(mrt, rt_next) != NULL) { + if (mrt->rt_priority > prio) + break; + mrt = LIST_NEXT(mrt, rt_next); + } + + if (mrt->rt_priority > prio) + LIST_INSERT_BEFORE(mrt, rt, rt_next); + else + LIST_INSERT_AFTER(mrt, rt, rt_next); + } else { + LIST_INSERT_HEAD(&an->an_rtlist, rt, rt_next); + } } #endif /* SMALL_KERNEL */ diff --git a/sys/net/rtable.h b/sys/net/rtable.h index 19b559c9055..f1f40eb86f8 100644 --- a/sys/net/rtable.h +++ b/sys/net/rtable.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rtable.h,v 1.3 2015/09/04 08:43:39 mpi Exp $ */ +/* $OpenBSD: rtable.h,v 1.4 2015/09/28 08:36:24 mpi Exp $ */ /* * Copyright (c) 2014-2015 Martin Pieuchot @@ -69,6 +69,6 @@ struct rtentry *rtable_mpath_match(unsigned int, struct rtentry *, struct sockaddr *, uint8_t); int rtable_mpath_conflict(unsigned int, struct sockaddr *, struct sockaddr *, struct sockaddr *, uint8_t, int); -struct rtentry *rtable_mpath_select(struct rtentry *, uint32_t *); +struct rtentry *rtable_mpath_select(struct rtentry *, uint32_t); void rtable_mpath_reprio(struct rtentry *, uint8_t); #endif /* _NET_RTABLE_H_ */ diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 86d6d8e8c11..e62a6497db8 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_carp.c,v 1.272 2015/09/27 04:27:57 dlg Exp $ */ +/* $OpenBSD: ip_carp.c,v 1.273 2015/09/28 08:36:24 mpi Exp $ */ /* * Copyright (c) 2002 Michael Shalayeff. All rights reserved. @@ -1298,20 +1298,19 @@ carp_send_na(struct carp_softc *sc) #endif /* INET6 */ /* - * Based on bridge_hash() in if_bridge.c + * Originated from bridge_hash() in if_bridge.c */ -#define mix(a,b,c) \ - do { \ - a -= b; a -= c; a ^= (c >> 13); \ - b -= c; b -= a; b ^= (a << 8); \ - c -= a; c -= b; c ^= (b >> 13); \ - a -= b; a -= c; a ^= (c >> 12); \ - b -= c; b -= a; b ^= (a << 16); \ - c -= a; c -= b; c ^= (b >> 5); \ - a -= b; a -= c; a ^= (c >> 3); \ - b -= c; b -= a; b ^= (a << 10); \ - c -= a; c -= b; c ^= (b >> 15); \ - } while (0) +#define mix(a, b, c) do { \ + a -= b; a -= c; a ^= (c >> 13); \ + b -= c; b -= a; b ^= (a << 8); \ + c -= a; c -= b; c ^= (b >> 13); \ + a -= b; a -= c; a ^= (c >> 12); \ + b -= c; b -= a; b ^= (a << 16); \ + c -= a; c -= b; c ^= (b >> 5); \ + a -= b; a -= c; a ^= (c >> 3); \ + b -= c; b -= a; b ^= (a << 10); \ + c -= a; c -= b; c ^= (b >> 15); \ +} while (0) u_int32_t carp_hash(struct carp_softc *sc, u_char *src) |