diff options
author | Jun-ichiro itojun Hagino <itojun@cvs.openbsd.org> | 2004-04-25 02:48:05 +0000 |
---|---|---|
committer | Jun-ichiro itojun Hagino <itojun@cvs.openbsd.org> | 2004-04-25 02:48:05 +0000 |
commit | c6b0397c7d1701e68dfbb8b0106f79196a34d424 (patch) | |
tree | 6665fdcb3ed2813f738a28f1185b681f9484fcfb /sys/net | |
parent | 8656adaef3dd7fc8fa5dbfea91bc95376b2bc07b (diff) |
radix tree with multipath support. from kame. deraadt ok
user visible changes:
- you can add multiple routes with same key (route add A B then route add A C)
- you have to specify gateway address if there are multiple entries on the table
(route delete A B, instead of route delete A)
kernel change:
- radix_node_head has an extra entry
- rnh_deladdr takes extra argument
TODO:
- actually take advantage of multipath (rtalloc -> rtalloc_mpath)
Diffstat (limited to 'sys/net')
-rw-r--r-- | sys/net/pf_table.c | 6 | ||||
-rw-r--r-- | sys/net/radix.c | 52 | ||||
-rw-r--r-- | sys/net/radix.h | 8 | ||||
-rw-r--r-- | sys/net/radix_mpath.c | 274 | ||||
-rw-r--r-- | sys/net/radix_mpath.h | 57 | ||||
-rw-r--r-- | sys/net/route.c | 30 | ||||
-rw-r--r-- | sys/net/route.h | 3 | ||||
-rw-r--r-- | sys/net/rtsock.c | 35 |
8 files changed, 453 insertions, 12 deletions
diff --git a/sys/net/pf_table.c b/sys/net/pf_table.c index 148b85ec8c1..a6c303c4dbf 100644 --- a/sys/net/pf_table.c +++ b/sys/net/pf_table.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_table.c,v 1.48 2004/04/09 19:30:41 frantzen Exp $ */ +/* $OpenBSD: pf_table.c,v 1.49 2004/04/25 02:48:03 itojun Exp $ */ /* * Copyright (c) 2002 Cedric Berger @@ -937,9 +937,9 @@ pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) s = splsoftnet(); if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); - rn = rn_delete(&ke->pfrke_sa, &mask, head); + rn = rn_delete(&ke->pfrke_sa, &mask, head, NULL); } else - rn = rn_delete(&ke->pfrke_sa, NULL, head); + rn = rn_delete(&ke->pfrke_sa, NULL, head, NULL); splx(s); if (rn == NULL) { diff --git a/sys/net/radix.c b/sys/net/radix.c index 97a85d7c49f..cd557aacd18 100644 --- a/sys/net/radix.c +++ b/sys/net/radix.c @@ -1,4 +1,4 @@ -/* $OpenBSD: radix.c,v 1.16 2004/04/25 01:38:10 brad Exp $ */ +/* $OpenBSD: radix.c,v 1.17 2004/04/25 02:48:03 itojun Exp $ */ /* $NetBSD: radix.c,v 1.20 2003/08/07 16:32:56 agc Exp $ */ /* @@ -50,6 +50,10 @@ #include <net/radix.h> #endif +#ifndef SMALL_KERNEL +#include <net/radix_mpath.h> +#endif + int max_keylen; struct radix_mask *rn_mkfreelist; struct radix_node_head *mask_rnhead; @@ -554,6 +558,21 @@ rn_addroute(v_arg, n_arg, head, treenodes) saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); if (keyduplicated) { for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { +#ifndef SMALL_KERNEL + /* permit multipath, if enabled for the family */ + if (rn_mpath_capable(head) && netmask == tt->rn_mask) { + /* + * go down to the end of multipaths, so that + * new entry goes into the end of rn_dupedkey + * chain. + */ + do { + t = tt; + tt = tt->rn_dupedkey; + } while (tt && t->rn_mask == tt->rn_mask); + break; + } +#endif if (tt->rn_mask == netmask) return (0); if (netmask == 0 || @@ -684,20 +703,39 @@ on2: } struct radix_node * -rn_delete(v_arg, netmask_arg, head) +rn_delete(v_arg, netmask_arg, head, rn) void *v_arg, *netmask_arg; struct radix_node_head *head; + struct radix_node *rn; { struct radix_node *t, *p, *x, *tt; struct radix_mask *m, *saved_m, **mp; struct radix_node *dupedkey, *saved_tt, *top; caddr_t v, netmask; int b, head_off, vlen; +#ifndef SMALL_KERNEL + int mpath_enable = 0; +#endif v = v_arg; netmask = netmask_arg; x = head->rnh_treetop; +#ifndef SMALL_KERNEL + if (rn && (rn->rn_mask != rn_zeros)) { + tt = rn; + /* + * Is this route(rn) a rn->dupedkey chain? + * Only default route is an exception. (rn_mask) + */ + if (rn_mpath_next(tt->rn_p)) + mpath_enable = 1; + else + tt = rn_search(v, x); + } else + tt = rn_search(v, x); +#else tt = rn_search(v, x); +#endif head_off = x->rn_off; vlen = *(u_char *)v; saved_tt = tt; @@ -806,6 +844,16 @@ on1: } goto out; } +#ifndef SMALL_KERNEL + if (mpath_enable) { + /* + * my parent dupedkey is NULL + * end of mpath route. + */ + t->rn_dupedkey = NULL; + goto out; + } +#endif if (t->rn_l == tt) x = t->rn_r; else diff --git a/sys/net/radix.h b/sys/net/radix.h index 1ebe63d3978..661858abd36 100644 --- a/sys/net/radix.h +++ b/sys/net/radix.h @@ -1,4 +1,4 @@ -/* $OpenBSD: radix.h,v 1.11 2004/04/25 01:38:10 brad Exp $ */ +/* $OpenBSD: radix.h,v 1.12 2004/04/25 02:48:03 itojun Exp $ */ /* $NetBSD: radix.h,v 1.8 1996/02/13 22:00:37 christos Exp $ */ /* @@ -114,7 +114,7 @@ struct radix_node_head { struct radix_node_head *head, struct radix_node nodes[]); /* remove based on sockaddr */ struct radix_node *(*rnh_deladdr)(void *v, void *mask, - struct radix_node_head *head); + struct radix_node_head *head, struct radix_node *rn); /* remove based on packet hdr */ struct radix_node *(*rnh_delpkt)(void *v, void *mask, struct radix_node_head *head); @@ -131,6 +131,7 @@ struct radix_node_head { int (*rnh_walktree)(struct radix_node_head *, int (*)(struct radix_node *, void *), void *); struct radix_node rnh_nodes[3];/* empty tree for common case */ + int rnh_multipath; /* multipath? */ }; @@ -159,7 +160,8 @@ struct radix_node *rn_addmask(void *, int, int), *rn_addroute(void *, void *, struct radix_node_head *, struct radix_node [2]), - *rn_delete(void *, void *, struct radix_node_head *), + *rn_delete(void *, void *, struct radix_node_head *, + struct radix_node *), *rn_insert(void *, struct radix_node_head *, int *, struct radix_node [2]), *rn_lookup(void *, void *, struct radix_node_head *), diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c new file mode 100644 index 00000000000..e835b54a703 --- /dev/null +++ b/sys/net/radix_mpath.c @@ -0,0 +1,274 @@ +/* $OpenBSD: radix_mpath.c,v 1.1 2004/04/25 02:48:03 itojun Exp $ */ +/* $KAME: radix_mpath.c,v 1.13 2002/10/28 21:05:59 itojun Exp $ */ + +/* + * Copyright (C) 2001 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE + * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS + * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL + * PROPERTIES. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/socket.h> +#define M_DONTWAIT M_NOWAIT +#include <sys/domain.h> +#include <sys/syslog.h> +#include <net/radix.h> +#include <net/radix_mpath.h> +#include <net/route.h> +#include <dev/rndvar.h> + +/* + * give some jitter to hash, to avoid synchronization between routers + */ +static u_int32_t hashjitter; + +int +rn_mpath_capable(rnh) + struct radix_node_head *rnh; +{ + + return rnh->rnh_multipath; +} + +struct radix_node * +rn_mpath_next(rn) + struct radix_node *rn; +{ + struct radix_node *next; + + if (!rn->rn_dupedkey) + return NULL; + next = rn->rn_dupedkey; + if (rn->rn_mask == next->rn_mask) + return next; + else + return NULL; +} + +int +rn_mpath_count(rn) + struct radix_node *rn; +{ + int i; + + i = 1; + while ((rn = rn_mpath_next(rn)) != NULL) + i++; + return i; +} + +struct rtentry * +rt_mpath_matchgate(rt, gate) + struct rtentry *rt; + struct sockaddr *gate; +{ + struct radix_node *rn; + + if (!rn_mpath_next((struct radix_node *)rt)) + return rt; + + if (!gate) + return NULL; + /* beyond here, we use rn as the master copy */ + rn = (struct radix_node *)rt; + do { + rt = (struct rtentry *)rn; + if (rt->rt_gateway->sa_len == gate->sa_len && + !memcmp(rt->rt_gateway, gate, gate->sa_len)) + break; + } while ((rn = rn_mpath_next(rn)) != NULL); + if (!rn) + return NULL; + + return (struct rtentry *)rn; +} + +/* + * check if we have the same key/mask/gateway on the table already. + */ +int +rt_mpath_conflict(rnh, rt, netmask) + struct radix_node_head *rnh; + struct rtentry *rt; + struct sockaddr *netmask; +{ + struct radix_node *rn, *rn1; + struct rtentry *rt1; + char *p, *q, *eq; + int same, l, skip; + + rn = (struct radix_node *)rt; + rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh); + if (!rn1 || rn1->rn_flags & RNF_ROOT) + return 0; + + /* + * unlike other functions we have in this file, we have to check + * all key/mask/gateway as rnh_lookup can match less specific entry. + */ + rt1 = (struct rtentry *)rn1; + + /* compare key. */ + if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len || + bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len)) + goto different; + + /* key was the same. compare netmask. hairy... */ + if (rt_mask(rt1) && netmask) { + skip = rnh->rnh_treetop->rn_off; + if (rt_mask(rt1)->sa_len > netmask->sa_len) { + /* + * as rt_mask(rt1) is made optimal by radix.c, + * there must be some 1-bits on rt_mask(rt1) + * after netmask->sa_len. therefore, in + * this case, the entries are different. + */ + if (rt_mask(rt1)->sa_len > skip) + goto different; + else { + /* no bits to compare, i.e. same*/ + goto maskmatched; + } + } + + l = rt_mask(rt1)->sa_len; + if (skip > l) { + /* no bits to compare, i.e. same */ + goto maskmatched; + } + p = (char *)rt_mask(rt1); + q = (char *)netmask; + if (bcmp(p + skip, q + skip, l - skip)) + goto different; + /* + * need to go through all the bit, as netmask is not + * optimal and can contain trailing 0s + */ + eq = (char *)netmask + netmask->sa_len; + q += l; + same = 1; + while (eq > q) + if (*q++) { + same = 0; + break; + } + if (!same) + goto different; + } else if (!rt_mask(rt1) && !netmask) + ; /* no mask to compare, i.e. same */ + else { + /* one has mask and the other does not, different */ + goto different; + } + + maskmatched:; + + /* key/mask were the same. compare gateway for all multipaths */ + do { + rt1 = (struct rtentry *)rn1; + + /* sanity: no use in comparing the same thing */ + if (rn1 == rn) + continue; + + if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len || + bcmp(rt1->rt_gateway, rt->rt_gateway, + rt1->rt_gateway->sa_len)) + continue; + + /* all key/mask/gateway are the same. conflicting entry. */ + return EEXIST; + } while ((rn1 = rn_mpath_next(rn1)) != NULL); + + different: + return 0; +} + +void +rtalloc_mpath(ro, hash) + struct route *ro; + int hash; +{ + struct radix_node *rn0, *rn; + int n; + + /* + * XXX we don't attempt to lookup cached route again; what should + * be done for sendto(3) case? + */ + if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) + return; /* XXX */ + ro->ro_rt = rtalloc1(&ro->ro_dst, 1); + /* if the route does not exist or it is not multipath, don't care */ + if (!ro->ro_rt || !rn_mpath_next((struct radix_node *)ro->ro_rt)) + return; + + /* beyond here, we use rn as the master copy */ + rn0 = rn = (struct radix_node *)ro->ro_rt; + n = rn_mpath_count(rn0); + + /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */ + hash += hashjitter; + hash %= n; + while (hash-- > 0 && rn) { + /* stay within the multipath routes */ + if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask) + break; + rn = rn->rn_dupedkey; + } + + /* XXX try filling rt_gwroute and avoid unreachable gw */ + + /* if gw selection fails, use the first match (default) */ + if (!rn) + return; + + rtfree(ro->ro_rt); + ro->ro_rt = (struct rtentry *)rn; + ro->ro_rt->rt_refcnt++; +} + +int +rn_mpath_inithead(head, off) + void **head; + int off; +{ + struct radix_node_head *rnh; + + hashjitter = arc4random(); + if (rn_inithead(head, off) == 1) { + rnh = (struct radix_node_head *)*head; + rnh->rnh_multipath = 1; + return 1; + } else + return 0; +} diff --git a/sys/net/radix_mpath.h b/sys/net/radix_mpath.h new file mode 100644 index 00000000000..f3e658207de --- /dev/null +++ b/sys/net/radix_mpath.h @@ -0,0 +1,57 @@ +/* $OpenBSD: radix_mpath.h,v 1.1 2004/04/25 02:48:03 itojun Exp $ */ +/* $KAME: radix_mpath.h,v 1.9 2004/03/30 11:21:49 keiichi Exp $ */ + +/* + * Copyright (C) 2001 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE + * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS + * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL + * PROPERTIES. + */ + +#ifndef _NET_RADIX_MPATH_H_ +#define _NET_RADIX_MPATH_H_ + +#ifdef _KERNEL +/* + * Radix tree API with multipath support + */ +struct route; +struct rtentry; +struct sockaddr; +int rn_mpath_capable __P((struct radix_node_head *)); +struct radix_node *rn_mpath_next __P((struct radix_node *)); +int rn_mpath_count __P((struct radix_node *)); +struct rtentry *rt_mpath_matchgate __P((struct rtentry *, struct sockaddr *)); +int rt_mpath_conflict __P((struct radix_node_head *, struct rtentry *, + struct sockaddr *)); +void rtalloc_mpath __P((struct route *, int)); +int rn_mpath_inithead __P((void **, int)); +#endif + +#endif /* _NET_RADIX_MPATH_H_ */ diff --git a/sys/net/route.c b/sys/net/route.c index 6cd04dc9b94..06fa17d9dcc 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -1,4 +1,4 @@ -/* $OpenBSD: route.c,v 1.39 2003/12/10 07:22:42 itojun Exp $ */ +/* $OpenBSD: route.c,v 1.40 2004/04/25 02:48:04 itojun Exp $ */ /* $NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $ */ /* @@ -672,7 +672,22 @@ rtrequest1(req, info, ret_nrt) netmask = 0; switch (req) { case RTM_DELETE: - if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL) + if ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL) + senderr(ESRCH); + rt = (struct rtentry *)rn; +#ifndef SMALL_KERNEL + /* + * if we got multipath routes, we require users to specify + * a matching RTAX_GATEWAY. + */ + if (rn_mpath_capable(rnh)) { + rt = rt_mpath_matchgate(rt, gateway); + rn = (struct radix_node *)rt; + if (!rt) + senderr(ESRCH); + } +#endif + if ((rn = rnh->rnh_deladdr(dst, netmask, rnh, rn)) == NULL) senderr(ESRCH); rt = (struct rtentry *)rn; if ((rt->rt_flags & RTF_CLONING) != 0) { @@ -735,6 +750,17 @@ rtrequest1(req, info, ret_nrt) rt_maskedcopy(dst, ndst, netmask); } else Bcopy(dst, ndst, dst->sa_len); +#ifndef SMALL_KERNEL + /* do not permit exactly the same dst/mask/gw pair */ + if (rn_mpath_capable(rnh) && + rt_mpath_conflict(rnh, rt, netmask)) { + if (rt->rt_gwroute) + rtfree(rt->rt_gwroute); + Free(rt_key(rt)); + Free(rt); + senderr(EEXIST); + } +#endif ifa->ifa_refcnt++; rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; diff --git a/sys/net/route.h b/sys/net/route.h index b18a5675d93..963fdd94a30 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -1,4 +1,4 @@ -/* $OpenBSD: route.h,v 1.19 2004/01/15 10:47:55 markus Exp $ */ +/* $OpenBSD: route.h,v 1.20 2004/04/25 02:48:04 itojun Exp $ */ /* $NetBSD: route.h,v 1.9 1996/02/13 22:00:49 christos Exp $ */ /* @@ -89,6 +89,7 @@ struct rt_metrics { */ #ifndef RNF_NORMAL #include <net/radix.h> +#include <net/radix_mpath.h> #endif struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index c55bf82fae2..fce8e94dc74 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rtsock.c,v 1.35 2004/01/15 10:47:55 markus Exp $ */ +/* $OpenBSD: rtsock.c,v 1.36 2004/04/25 02:48:04 itojun Exp $ */ /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ /* @@ -270,7 +270,40 @@ route_output(struct mbuf *m, ...) senderr(ESRCH); } rt = (struct rtentry *)rn; +#ifndef SMALL_KERNEL + /* + * for RTM_CHANGE/LOCK, if we got multipath routes, + * we require users to specify a matching RTAX_GATEWAY. + * + * for RTM_GET, gate is optional even with multipath. + * if gate == NULL the first match is returned. + * (no need to call rt_mpath_matchgate if gate == NULL) + */ + if (rn_mpath_capable(rnh) && + (rtm->rtm_type != RTM_GET || gate)) { + rt = rt_mpath_matchgate(rt, gate); + rn = (struct radix_node *)rt; + if (!rt) + senderr(ESRCH); + } +#endif rt->rt_refcnt++; + if (rtm->rtm_type != RTM_GET) {/* XXX: too grotty */ + struct radix_node *rn; + extern struct radix_node_head *mask_rnhead; + + if (Bcmp(dst, rt_key(rt), dst->sa_len) != 0) + senderr(ESRCH); + if (netmask && (rn = rn_search(netmask, + mask_rnhead->rnh_treetop))) + netmask = (struct sockaddr *)rn->rn_key; + for (rn = rt->rt_nodes; rn; rn = rn->rn_dupedkey) + if (netmask == (struct sockaddr *)rn->rn_mask) + break; + if (rn == 0) + senderr(ETOOMANYREFS); + rt = (struct rtentry *)rn; + } switch (rtm->rtm_type) { |