diff options
author | Claudio Jeker <claudio@cvs.openbsd.org> | 2004-01-10 16:20:30 +0000 |
---|---|---|
committer | Claudio Jeker <claudio@cvs.openbsd.org> | 2004-01-10 16:20:30 +0000 |
commit | d8df3d2156171ba2242d297a4dbb9363795d506e (patch) | |
tree | c65ff19e7c8cee3ff128e4d5cb68f0b9b35026ca | |
parent | 67805e42d42678d6bd124985db26f5eef7e82a3d (diff) |
RDE update generation. First we queue all updates and withdraws on a per
peer basis. A queue runner will dequeue and package those messages to valid
bgp UPDATE messages and send them to the SE.
Not yet done is per peer type attribute handling (like aspath prepends and
nexthop modifications) and the queue runner could be a tad smarter. All in
all this gives us a good starting point for the missing parts.
OK henning@
-rw-r--r-- | usr.sbin/bgpd/mrt.c | 92 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde.c | 78 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde.h | 77 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde_decide.c | 415 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde_rib.c | 268 |
5 files changed, 755 insertions, 175 deletions
diff --git a/usr.sbin/bgpd/mrt.c b/usr.sbin/bgpd/mrt.c index a2a029e0276..4e68ae09e12 100644 --- a/usr.sbin/bgpd/mrt.c +++ b/usr.sbin/bgpd/mrt.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mrt.c,v 1.19 2004/01/07 12:34:23 claudio Exp $ */ +/* $OpenBSD: mrt.c,v 1.20 2004/01/10 16:20:29 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -38,10 +38,13 @@ * XXX imsg_create(), imsg_add(), imsg_close() ... */ -static int mrt_dump_entry(struct mrt_config *, struct prefix *, u_int16_t, - struct peer_config *); -static int mrt_dump_header(struct buf *, u_int16_t, u_int16_t, u_int32_t); -static int mrt_open(struct mrt *); +static u_int16_t mrt_attr_length(struct attr_flags *); +static int mrt_attr_dump(void *, u_int16_t, struct attr_flags *); +static int mrt_dump_entry(struct mrt_config *, struct prefix *, + u_int16_t, struct peer_config *); +static int mrt_dump_header(struct buf *, u_int16_t, u_int16_t, + u_int32_t); +static int mrt_open(struct mrt *); #define DUMP_BYTE(x, b) \ do { \ @@ -196,6 +199,79 @@ mrt_dump_state(struct mrt_config *mrt, u_int16_t old_state, u_int16_t new_state, } +static u_int16_t +mrt_attr_length(struct attr_flags *a) +{ + struct attr *oa; + u_int16_t alen, plen; + + alen = 4 /* origin */ + 7 /* nexthop */ + 7 /* lpref */; + plen = aspath_length(a->aspath); + alen += 2 + plen + (plen > 255 ? 2 : 1); + if (a->med != 0) + alen += 7; + + TAILQ_FOREACH(oa, &a->others, attr_l) + alen += 2 + oa->len + (oa->len > 255 ? 2 : 1); + + return alen; +} + +static int +mrt_attr_dump(void *p, u_int16_t len, struct attr_flags *a) +{ + struct attr *oa; + u_char *buf = p; + u_int32_t tmp32; + int r; + u_int16_t aslen, wlen = 0; + + /* origin */ + if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN, ATTR_ORIGIN, + &a->origin, 1)) == -1) + return (-1); + wlen += r; len -= r; + + /* aspath */ + aslen = aspath_length(a->aspath); + if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN, ATTR_ASPATH, + aspath_dump(a->aspath), aslen)) == -1) + return (-1); + wlen += r; len -= r; + + /* nexthop, already network byte order */ + if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN, ATTR_NEXTHOP, + &a->nexthop, 4)) == -1) + return (-1); + wlen += r; len -= r; + + /* MED, non transitive */ + if (a->med != 0) { + tmp32 = htonl(a->med); + if ((r = attr_write(buf + wlen, len, ATTR_OPTIONAL, ATTR_MED, + &tmp32, 4)) == -1) + return (-1); + wlen += r; len -= r; + } + + /* local preference, only valid for ibgp */ + tmp32 = htonl(a->lpref); + if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN, ATTR_LOCALPREF, + &tmp32, 4)) == -1) + return (-1); + wlen += r; len -= r; + + /* dump all other path attributes without modification */ + TAILQ_FOREACH(oa, &a->others, attr_l) { + if ((r = attr_write(buf + wlen, len, oa->flags, oa->type, + oa->data, oa->len)) == -1) + return (-1); + wlen += r; len -= r; + } + + return (wlen); +} + static int mrt_dump_entry(struct mrt_config *mrt, struct prefix *p, u_int16_t snum, struct peer_config *peer) @@ -206,7 +282,7 @@ mrt_dump_entry(struct mrt_config *mrt, struct prefix *p, u_int16_t snum, u_int16_t len, attr_len; int n; - attr_len = attr_length(&p->aspath->flags); + attr_len = mrt_attr_length(&p->aspath->flags); len = MRT_DUMP_HEADER_SIZE + attr_len; hdr.len = len + IMSG_HEADER_SIZE + MRT_HEADER_SIZE; @@ -244,8 +320,8 @@ mrt_dump_entry(struct mrt_config *mrt, struct prefix *p, u_int16_t snum, return (-1); } - if (attr_dump(bptr, attr_len, &p->aspath->flags) == -1) { - logit(LOG_ERR, "mrt_dump_entry: attr_dump error"); + if (mrt_attr_dump(bptr, attr_len, &p->aspath->flags) == -1) { + logit(LOG_ERR, "mrt_dump_entry: mrt_attr_dump error"); buf_free(buf); return (-1); } diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index b6a90c2609d..84614366952 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.50 2004/01/07 12:38:36 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.51 2004/01/10 16:20:29 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -42,11 +42,13 @@ int rde_update_dispatch(struct imsg *); int rde_update_get_prefix(u_char *, u_int16_t, struct in_addr *, u_int8_t *); void init_attr_flags(struct attr_flags *); -int rde_update_get_attr(u_char *, u_int16_t, struct attr_flags *); +int rde_update_get_attr(struct rde_peer *, u_char *, u_int16_t, + struct attr_flags *); void rde_update_err(u_int32_t, enum suberr_update); void rde_update_log(const char *, const struct rde_peer *, const struct attr_flags *, const struct in_addr *, u_int8_t); +void rde_update_queue_runner(void); void peer_init(struct peer *, u_long); struct rde_peer *peer_add(u_int32_t, struct peer_config *); @@ -166,6 +168,7 @@ rde_main(struct bgpd_config *config, struct peer *peer_l, int pipe_m2r[2], nfds--; rde_dispatch_imsg_session(&ibuf_se); } + rde_update_queue_runner(); } logit(LOG_INFO, "route decision engine exiting"); @@ -364,7 +367,8 @@ rde_update_dispatch(struct imsg *imsg) init_attr_flags(&attrs); while (attrpath_len > 0) { - if ((pos = rde_update_get_attr(p, attrpath_len, &attrs)) < 0) { + if ((pos = rde_update_get_attr(peer, p, attrpath_len, + &attrs)) < 0) { rde_update_err(peer->conf.id, ERR_UPD_ATTRLIST); return (-1); } @@ -436,15 +440,16 @@ init_attr_flags(struct attr_flags *a) { bzero(a, sizeof(struct attr_flags)); a->origin = ORIGIN_INCOMPLETE; + TAILQ_INIT(&a->others); } int -rde_update_get_attr(u_char *p, u_int16_t len, struct attr_flags *a) +rde_update_get_attr(struct rde_peer *peer, u_char *p, u_int16_t len, + struct attr_flags *a) { u_int32_t tmp32; u_int16_t attr_len; u_int16_t plen = 0; - u_int16_t tmp16; u_int8_t flags; u_int8_t type; u_int8_t tmp8; @@ -502,23 +507,18 @@ rde_update_get_attr(u_char *p, u_int16_t len, struct attr_flags *a) case ATTR_LOCALPREF: if (attr_len != 4) return (-1); + if (peer->conf.ebgp) { + /* ignore local-pref attr for non ibgp peers */ + a->lpref = 0; /* set a default value */ + break; + } UPD_READ(&tmp32, p, plen, 4); a->lpref = ntohl(tmp32); break; case ATTR_ATOMIC_AGGREGATE: - if (attr_len > 0) - return (-1); - a->aggr_atm = 1; - break; case ATTR_AGGREGATOR: - if (attr_len != 6) - return (-1); - UPD_READ(&tmp16, p, plen, 2); - a->aggr_as = ntohs(tmp16); - UPD_READ(&a->aggr_ip, p, plen, 4); /*network byte order */ - break; default: - /* ignore for now */ + attr_optadd(a, flags, type, p, attr_len); plen += attr_len; break; } @@ -618,6 +618,49 @@ rde_send_nexthop(in_addr_t next, int valid) fatal("imsg_compose error"); } +u_char queue_buf[4096]; + +void +rde_update_queue_runner(void) +{ + struct rde_peer *peer; + int r, sent; + u_int16_t len, wd_len, wpos; + + len = sizeof(queue_buf) - MSGSIZE_HEADER; + do { + sent = 0; + LIST_FOREACH(peer, &peerlist, peer_l) { + if (peer->state != PEER_UP) + continue; + /* first withdraws */ + wpos = 2; + r = up_dump_prefix(queue_buf + wpos, len - wpos, + &peer->withdraws, peer); + wd_len = r; + wd_len = htons(wd_len); + memcpy(queue_buf, &wd_len, 2); + wpos += r; + + /* now bgp path attributes */ + r = up_dump_attrnlri(queue_buf + wpos, len - wpos, + peer); + wpos += r; + + if (wpos == 2) + /* no packet to send */ + continue; + + /* finally send message to SE */ + if (imsg_compose(&ibuf_se, IMSG_UPDATE, peer->conf.id, + queue_buf, wpos) == -1) + fatal("imsg_compose error"); + sent++; + } + } while (sent != 0); +} + + /* * peer functions */ @@ -688,6 +731,7 @@ peer_add(u_int32_t id, struct peer_config *p_conf) memcpy(&peer->conf, p_conf, sizeof(struct peer_config)); peer->remote_bgpid = 0; peer->state = PEER_NONE; + up_init(peer); head = PEER_HASH(id); ENSURE(head != NULL); @@ -731,6 +775,7 @@ peer_up(u_int32_t id, u_int32_t rid) } peer->remote_bgpid = ntohl(rid); peer->state = PEER_UP; + up_init(peer); } void @@ -746,6 +791,7 @@ peer_down(u_int32_t id) } peer->remote_bgpid = 0; peer->state = PEER_DOWN; + up_down(peer); /* walk through per peer RIB list and remove all prefixes. */ for (asp = LIST_FIRST(&peer->path_h); diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h index 6e329e678d7..b151c31553b 100644 --- a/usr.sbin/bgpd/rde.h +++ b/usr.sbin/bgpd/rde.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.h,v 1.11 2004/01/06 10:51:14 claudio Exp $ */ +/* $OpenBSD: rde.h,v 1.12 2004/01/10 16:20:29 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> and @@ -21,6 +21,7 @@ #include <sys/types.h> #include <sys/queue.h> +#include <sys/tree.h> #include "bgpd.h" @@ -48,14 +49,27 @@ enum peer_state { */ LIST_HEAD(rde_peer_head, rde_peer); LIST_HEAD(aspath_head, rde_aspath); +RB_HEAD(uptree_prefix, update_prefix); +RB_HEAD(uptree_attr, update_attr); +TAILQ_HEAD(uplist_prefix, update_prefix); +TAILQ_HEAD(uplist_attr, update_attr); struct rde_peer { - LIST_ENTRY(rde_peer) hash_l; /* hash list over all peers */ - LIST_ENTRY(rde_peer) peer_l; /* list of all peers */ - struct aspath_head path_h; /* list of all as paths */ - struct peer_config conf; - u_int32_t remote_bgpid; - enum peer_state state; + LIST_ENTRY(rde_peer) hash_l; /* hash list over all peers */ + LIST_ENTRY(rde_peer) peer_l; /* list of all peers */ + struct aspath_head path_h; /* list of all as paths */ + struct peer_config conf; + u_int32_t remote_bgpid; + enum peer_state state; + struct in_addr if_ip; /* nexthop for announcements*/ + u_int32_t up_pcnt; + u_int32_t up_acnt; + u_int32_t up_nlricnt; + u_int32_t up_wcnt; + struct uptree_prefix up_prefix; + struct uptree_attr up_attrs; + struct uplist_attr updates; + struct uplist_prefix withdraws; }; #define AS_SET 1 @@ -114,29 +128,29 @@ enum attrtypes { #define ATTR_OPTIONAL 0x80 /* default attribute flags for well known attributes */ -#define ATTR_ORIGIN_FLAGS ATTR_TRANSITIVE -#define ATTR_NEXTHOP_FLAGS ATTR_TRANSITIVE -#define ATTR_MED_FLAGS ATTR_OPTIONAL -#define ATTR_LOCALPREF_FLAGS ATTR_TRANSITIVE -#define ATTR_ATOMIC_AGGREGATE_FLAGS ATTR_TRANSITIVE -#define ATTR_AGGREGATOR_FLAGS (ATTR_OPTIONAL | ATTR_TRANSITIVE) - -enum origins { - ORIGIN_IGP, - ORIGIN_EGP, - ORIGIN_INCOMPLETE +#define ATTR_WELL_KNOWN ATTR_TRANSITIVE + +struct attr { + u_int8_t flags; + u_int8_t type; + u_int16_t len; + u_char *data; + TAILQ_ENTRY(attr) attr_l; }; +TAILQ_HEAD(attr_list, attr); + +#define ORIGIN_IGP 0 +#define ORIGIN_EGP 1 +#define ORIGIN_INCOMPLETE 2 + struct attr_flags { - enum origins origin; struct aspath *aspath; - struct astags *astags; struct in_addr nexthop; /* exit nexthop */ u_int32_t med; /* multi exit disc */ u_int32_t lpref; /* local pref */ - u_int8_t aggr_atm; /* atomic aggregate */ - u_int16_t aggr_as; /* aggregator as */ - struct in_addr aggr_ip; /* aggregator ip */ + u_int8_t origin; + struct attr_list others; }; enum nexthop_state { @@ -206,10 +220,12 @@ void rde_send_kroute(struct prefix *, struct prefix *); void rde_send_nexthop(in_addr_t, int); /* rde_rib.c */ -int attr_equal(struct attr_flags *, struct attr_flags *); +int attr_compare(struct attr_flags *, struct attr_flags *); void attr_copy(struct attr_flags *, struct attr_flags *); -u_int16_t attr_length(struct attr_flags *); -int attr_dump(void *, u_int16_t, struct attr_flags *); +int attr_write(void *, u_int16_t, u_int8_t, u_int8_t, void *, + u_int16_t); +void attr_optadd(struct attr_flags *, u_int8_t, u_int8_t, + u_char *, u_int16_t); int aspath_verify(void *, u_int16_t, u_int16_t); #define AS_ERR_LEN -1 @@ -222,11 +238,11 @@ u_int16_t aspath_length(struct aspath *); u_int16_t aspath_count(struct aspath *); u_int16_t aspath_neighbour(struct aspath *); u_long aspath_hash(struct aspath *); -int aspath_equal(struct aspath *, struct aspath *); +int aspath_compare(struct aspath *, struct aspath *); void path_init(u_long); void path_update(struct rde_peer *, struct attr_flags *, - struct in_addr , int); + struct in_addr , int); struct rde_aspath *path_get(struct aspath *, struct rde_peer *); struct rde_aspath *path_add(struct rde_peer *, struct attr_flags *); void path_remove(struct rde_aspath *); @@ -249,6 +265,11 @@ void nexthop_update(struct kroute_nexthop *); /* rde_decide.c */ void prefix_evaluate(struct prefix *, struct pt_entry *); +void up_init(struct rde_peer *); +void up_down(struct rde_peer *); +int up_dump_prefix(u_char *, int, struct uplist_prefix *, + struct rde_peer *); +int up_dump_attrnlri(u_char *, int, struct rde_peer *); /* rde_prefix.c */ void pt_init(void); diff --git a/usr.sbin/bgpd/rde_decide.c b/usr.sbin/bgpd/rde_decide.c index 77adf20beb9..4e66e58515b 100644 --- a/usr.sbin/bgpd/rde_decide.c +++ b/usr.sbin/bgpd/rde_decide.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_decide.c,v 1.11 2004/01/07 00:01:17 claudio Exp $ */ +/* $OpenBSD: rde_decide.c,v 1.12 2004/01/10 16:20:29 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -20,9 +20,19 @@ #include <sys/types.h> #include <sys/queue.h> +#include <stdlib.h> +#include <string.h> + #include "bgpd.h" #include "ensure.h" #include "rde.h" +#include "session.h" + +int prefix_cmp(struct prefix *, struct prefix *); +void up_generate_updates(struct prefix *, struct prefix *); +int up_generate_attr(struct rde_peer *, struct update_attr *, + struct attr_flags *); +int up_set_prefix(u_char *, int, struct in_addr, u_int8_t); /* * Decision Engine RFC implementation: @@ -106,7 +116,7 @@ * than the prefix p2. p1 should be used for the new prefix and p2 for a * already added prefix. */ -static int +int prefix_cmp(struct prefix *p1, struct prefix *p2) { struct rde_aspath *asp1, *asp2; @@ -216,6 +226,7 @@ prefix_evaluate(struct prefix *p, struct pt_entry *pte) * has an unreachable nexthop */ + up_generate_updates(xp, pte->active); rde_send_kroute(xp, pte->active); if (xp == NULL || xp->aspath->nexthop == NULL || @@ -229,3 +240,403 @@ prefix_evaluate(struct prefix *p, struct pt_entry *pte) } } } + + +/* update stuff. */ +struct update_prefix { + struct in_addr prefix; + int prefixlen; + struct uplist_prefix *prefix_h; + TAILQ_ENTRY(update_prefix) prefix_l; + RB_ENTRY(update_prefix) entry; +}; + +struct update_attr { + u_long attr_hash; + u_char *attr; + u_int16_t attr_len; + struct uplist_prefix prefix_h; + TAILQ_ENTRY(update_attr) attr_l; + RB_ENTRY(update_attr) entry; +}; + +int up_prefix_cmp(struct update_prefix *, struct update_prefix *); +int up_attr_cmp(struct update_attr *, struct update_attr *); +int up_add(struct rde_peer *, struct update_prefix *, struct update_attr *); + +RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp); +RB_GENERATE(uptree_prefix, update_prefix, entry, up_prefix_cmp); + +RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp); +RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp); + +void +up_init(struct rde_peer *peer) +{ + TAILQ_INIT(&peer->updates); + TAILQ_INIT(&peer->withdraws); + RB_INIT(&peer->up_prefix); + RB_INIT(&peer->up_attrs); + peer->up_pcnt = 0; + peer->up_acnt = 0; + peer->up_nlricnt = 0; + peer->up_wcnt = 0; +} + +void +up_down(struct rde_peer *peer) +{ + struct update_attr *ua, *xua; + struct update_prefix *up, *xup; + + for (ua = TAILQ_FIRST(&peer->updates); ua != TAILQ_END(&peer->updates); + ua = xua) { + xua = TAILQ_NEXT(ua, attr_l); + for (up = TAILQ_FIRST(&ua->prefix_h); + up != TAILQ_END(&ua->prefix_h); up = xup) { + xup = TAILQ_NEXT(up, prefix_l); + free(up); + } + free(ua); + } + + for (up = TAILQ_FIRST(&peer->withdraws); + up != TAILQ_END(&peer->withdraws); up = xup) { + xup = TAILQ_NEXT(up, prefix_l); + free(up); + } + + TAILQ_INIT(&peer->updates); + TAILQ_INIT(&peer->withdraws); + RB_INIT(&peer->up_prefix); + RB_INIT(&peer->up_attrs); + + peer->up_pcnt = 0; + peer->up_acnt = 0; + peer->up_nlricnt = 0; + peer->up_wcnt = 0; +} + +int +up_prefix_cmp(struct update_prefix *a, struct update_prefix *b) +{ + if (a->prefix.s_addr < b->prefix.s_addr) + return (-1); + if (a->prefix.s_addr > b->prefix.s_addr) + return (1); + if (a->prefixlen < b->prefixlen) + return (-1); + if (a->prefixlen > b->prefixlen) + return (1); + return (0); +} + +int +up_attr_cmp(struct update_attr *a, struct update_attr *b) +{ + if (a->attr_hash < b->attr_hash) + return (-1); + if (a->attr_hash > b->attr_hash) + return (1); + if (a->attr_len < b->attr_len) + return (-1); + if (a->attr_len > b->attr_len) + return (1); + return memcmp(a->attr, b->attr, a->attr_len); +} + +int +up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr *a) +{ + struct update_attr *na; + struct update_prefix *np; + + ENSURE(p != NULL); + + /* 1. search for attr */ + if (a != NULL && (na = RB_FIND(uptree_attr, &peer->up_attrs, a)) == + NULL) { + /* 1.1 if not found -> add */ + TAILQ_INIT(&a->prefix_h); + if (RB_INSERT(uptree_attr, &peer->up_attrs, a) != NULL) { + logit(LOG_CRIT, "uptree_attr insert failed"); + return (-1); + } + TAILQ_INSERT_TAIL(&peer->updates, a, attr_l); + peer->up_acnt++; + } else { + /* 1.2 if found -> use that, free a */ + if (a != NULL) { + free(a); + a = na; + /* move to end of update queue */ + TAILQ_REMOVE(&peer->updates, a, attr_l); + TAILQ_INSERT_TAIL(&peer->updates, a, attr_l); + } + } + + /* 2. search for prefix */ + if ((np = RB_FIND(uptree_prefix, &peer->up_prefix, p)) == NULL) { + /* 2.1 if not found -> add */ + if (RB_INSERT(uptree_prefix, &peer->up_prefix, p) != NULL) { + logit(LOG_CRIT, "uptree_prefix insert failed"); + return (-1); + } + peer->up_pcnt++; + } else { + /* 2.2 if found -> use that and free p */ + TAILQ_REMOVE(np->prefix_h, np, prefix_l); + free(p); + p = np; + if (p->prefix_h == &peer->withdraws) + peer->up_wcnt--; + else + peer->up_nlricnt--; + } + /* 3. link prefix to attr */ + if (a == NULL) { + TAILQ_INSERT_TAIL(&peer->withdraws, p, prefix_l); + p->prefix_h = &peer->withdraws; + peer->up_wcnt++; + } else { + TAILQ_INSERT_TAIL(&a->prefix_h, p, prefix_l); + p->prefix_h = &a->prefix_h; + peer->up_nlricnt++; + } + return (0); +} + +void +up_generate_updates(struct prefix *new, struct prefix *old) +{ + extern struct rde_peer_head peerlist; + struct rde_peer *peer; + struct update_attr *a; + struct update_prefix *p; + + if ((old == NULL || old->aspath->nexthop == NULL || + old->aspath->nexthop->state != NEXTHOP_REACH) && + (new == NULL || new->aspath->nexthop == NULL || + new->aspath->nexthop->state != NEXTHOP_REACH)) + return; + + LIST_FOREACH(peer, &peerlist, peer_l) { + if (peer->state != PEER_UP) + continue; + /* + * Filtering should be hooked up here. + * With filtering the decision if withdraw, update or nothing + * needs to be done on a per peer basis -- acctually per filter + * set. + */ + + p = calloc(1, sizeof(struct update_prefix)); + if (p == NULL) + fatal("up_queue_update"); + + if (new == NULL || new->aspath->nexthop == NULL || + new->aspath->nexthop->state != NEXTHOP_REACH) { + /* withdraw prefix */ + p->prefix = old->prefix->prefix; + p->prefixlen = old->prefix->prefixlen; + if (up_add(peer, p, NULL) == -1) + logit(LOG_CRIT, "queuing update failed."); + } else { + /* generate update */ + a = calloc(1, sizeof(struct update_attr)); + if (a == NULL) + fatal("up_queue_update"); + + if (up_generate_attr(peer, a, &new->aspath->flags) == + -1) + logit(LOG_CRIT, + "generation of bgp path attributes failed"); + + /* + * use aspath_hash as attr_hash, this may be unoptimal + * but currently I don't care. + */ + a->attr_hash = aspath_hash(new->aspath->flags.aspath); + p->prefix = new->prefix->prefix; + p->prefixlen = new->prefix->prefixlen; + + if (up_add(peer, p, a) == -1) + logit(LOG_CRIT, "queuing update failed."); + } + } +} + +u_char up_attr_buf[4096]; + +int +up_generate_attr(struct rde_peer *peer, struct update_attr *upa, + struct attr_flags *a) +{ + struct attr *oa; + u_int32_t tmp32; + int r; + u_int16_t aslen, len = sizeof(up_attr_buf), wlen = 0; + + /* origin */ + if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, + ATTR_ORIGIN, &a->origin, 1)) == -1) + return (-1); + wlen += r; len -= r; + + /* aspath */ + /* XXX XXX aspath prepends */ + aslen = aspath_length(a->aspath); + if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, + ATTR_ASPATH, aspath_dump(a->aspath), aslen)) == -1) + return (-1); + wlen += r; len -= r; + + /* nexthop, already network byte order */ + /* XXX XXX nexthop fixup */ + if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, + ATTR_NEXTHOP, &a->nexthop, 4)) == -1) + return (-1); + wlen += r; len -= r; + + /* + * The MED of other peers MUST not be announced to others. + * Currently we just dump it. Possibilities are setting the MED via + * a filter or set it to local-pref. struct attr_flags probably needs + * a med_in and a med_out field. + */ + + if (peer->conf.ebgp == 0) { + /* local preference, only valid for ibgp */ + tmp32 = htonl(a->lpref); + if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, + ATTR_LOCALPREF, &tmp32, 4)) == -1) + return (-1); + wlen += r; len -= r; + } + + /* + * dump all other path attributes. Following rules apply: + * 1. well-known attrs: ATTR_ATOMIC_AGGREGATE and ATTR_AGGREGATOR + * pass unmodified (enforce flags to correct values) + * 2. non-transitive attrs: don't re-announce + * 3. transitive known attrs: announce unmodified + * 4. transitive unknown attrs: set partial bit and re-announce + */ + TAILQ_FOREACH(oa, &a->others, attr_l) { + switch (oa->type) { + case ATTR_ATOMIC_AGGREGATE: + if ((r = attr_write(up_attr_buf + wlen, len, + ATTR_WELL_KNOWN, ATTR_ATOMIC_AGGREGATE, + NULL, 0)) == -1) + return (-1); + break; + case ATTR_AGGREGATOR: + if ((r = attr_write(up_attr_buf + wlen, len, + ATTR_OPTIONAL | ATTR_TRANSITIVE, ATTR_AGGREGATOR, + oa->data, oa->len)) == -1) + return (-1); + break; + /* + * currently there are no non-transitive or transitive known + * attributes. + */ + default: + /* unknown attribute */ + if (!(oa->flags & ATTR_OPTIONAL)) + /* somehow a non-transitive slipped through */ + break; + if ((r = attr_write(up_attr_buf + wlen, len, + oa->flags | ATTR_PARTIAL, oa->type, + oa->data, oa->len)) == -1) + return (-1); + break; + } + wlen += r; len -= r; + } + + /* the bgp path attributes are now stored in the global buf */ + upa->attr = malloc(wlen); + if (upa->attr == NULL) + fatal("up_generate_attr"); + memcpy(upa->attr, up_attr_buf, wlen); + upa->attr_len = wlen; + return (wlen); +} + +int +up_set_prefix(u_char *buf, int len, struct in_addr prefix, u_int8_t plen) +{ + int totlen; + + ENSURE(plen <= 32); + totlen = (plen + 7) / 8 + 1; + + if (totlen > len) + return (-1); + *buf++ = plen; + memcpy(buf, &prefix.s_addr, totlen - 1); + return (totlen); +} + +int +up_dump_prefix(u_char *buf, int len, struct uplist_prefix *prefix_head, + struct rde_peer *peer) +{ + struct update_prefix *upp, *xupp; + int r, wpos = 0; + + for (upp = TAILQ_FIRST(prefix_head); + upp != TAILQ_END(prefix_head); upp = xupp) { + xupp = TAILQ_NEXT(upp, prefix_l); + if ((r = up_set_prefix(buf + wpos, len - wpos, + upp->prefix, upp->prefixlen)) == -1) + break; + wpos += r; + if (RB_REMOVE(uptree_prefix, &peer->up_prefix, upp) == NULL) + logit(LOG_CRIT, "dequeuing update failed."); + TAILQ_REMOVE(upp->prefix_h, upp, prefix_l); + peer->up_pcnt--; + if (upp->prefix_h == &peer->withdraws) + peer->up_wcnt--; + else + peer->up_nlricnt--; + free(upp); + } + return (wpos); +} + +int +up_dump_attrnlri(u_char *buf, int len, struct rde_peer *peer) +{ + struct update_attr *upa; + int r, wpos; + u_int16_t attr_len; + + upa = TAILQ_FIRST(&peer->updates); + if (upa == NULL || upa->attr_len + 5 > len) + /* either no packet or not enough space */ + return (0); + + /* first dump the attributes */ + attr_len = htons(upa->attr_len); + memcpy(buf, &attr_len, 2); + wpos = 2; + memcpy(buf + wpos, upa->attr, upa->attr_len); + wpos += upa->attr_len; + + /* now dump the nlri */ + r = up_dump_prefix(buf + wpos, len - wpos, &upa->prefix_h, peer); + wpos += r; + + /* now check if all prefixes where written */ + if (TAILQ_EMPTY(&upa->prefix_h)) { + if (RB_REMOVE(uptree_attr, &peer->up_attrs, upa) == NULL) + logit(LOG_CRIT, "dequeuing update failed."); + TAILQ_REMOVE(&peer->updates, upa, attr_l); + free(upa); + peer->up_acnt--; + } + + return (wpos); +} + diff --git a/usr.sbin/bgpd/rde_rib.c b/usr.sbin/bgpd/rde_rib.c index 9da4a8b6780..097d18e02e6 100644 --- a/usr.sbin/bgpd/rde_rib.c +++ b/usr.sbin/bgpd/rde_rib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_rib.c,v 1.14 2004/01/06 10:51:14 claudio Exp $ */ +/* $OpenBSD: rde_rib.c,v 1.15 2004/01/10 16:20:29 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -75,135 +75,153 @@ struct rib_stats { #define MAX_PREFIX_PER_AS 1500 /* attribute specific functions */ +void attr_optfree(struct attr_flags *); int -attr_equal(struct attr_flags *a, struct attr_flags *b) +attr_compare(struct attr_flags *a, struct attr_flags *b) { - /* astags not yet used */ - if (a->origin != b->origin || - aspath_equal(a->aspath, b->aspath) == 0 || - a->nexthop.s_addr != b->nexthop.s_addr || - a->med != b->med || - a->lpref != b->lpref || - a->aggr_atm != b->aggr_atm || - a->aggr_as != b->aggr_as || - a->aggr_ip.s_addr != b->aggr_ip.s_addr) - return 0; - return 1; + struct attr *oa, *ob; + int r; + + if (a->origin > b->origin) + return (1); + if (a->origin < b->origin) + return (-1); + if (a->nexthop.s_addr > b->nexthop.s_addr) + return (1); + if (a->nexthop.s_addr < b->nexthop.s_addr) + return (-1); + if (a->med > b->med) + return (1); + if (a->med < b->med) + return (-1); + if (a->lpref > b->lpref) + return (1); + if (a->lpref < b->lpref) + return (-1); + r = aspath_compare(a->aspath, b->aspath); + if (r > 0) + return (1); + if (r < 0) + return (-1); + + for (oa = TAILQ_FIRST(&a->others), ob = TAILQ_FIRST(&b->others); + oa != TAILQ_END(&a->others) && ob != TAILQ_END(&a->others); + oa = TAILQ_NEXT(oa, attr_l), ob = TAILQ_NEXT(ob, attr_l)) { + if (oa->type > ob->type) + return (1); + if (oa->type < ob->type) + return (-1); + if (oa->len > ob->len) + return (1); + if (oa->len < ob->len) + return (-1); + r = memcmp(oa->data, ob->data, oa->len); + if (r > 0) + return (1); + if (r < 0) + return (-1); + } + if (oa != TAILQ_END(&a->others)) + return (1); + if (ob != TAILQ_END(&a->others)) + return (-1); + return (0); } void attr_copy(struct attr_flags *t, struct attr_flags *s) { + struct attr *os; /* * first copy the full struct, then replace the path and tags with * a own copy. */ memcpy(t, s, sizeof(struct attr_flags)); - /* XXX we could speed that a bit with a direct malloc, memcpy */ t->aspath = aspath_create(s->aspath->data, s->aspath->hdr.len); - t->astags = NULL; /* XXX NOT YET */ + TAILQ_INIT(&t->others); + TAILQ_FOREACH(os, &s->others, attr_l) + attr_optadd(t, os->flags, os->type, os->data, os->len); } -u_int16_t -attr_length(struct attr_flags *attr) +int +attr_write(void *p, u_int16_t p_len, u_int8_t flags, u_int8_t type, + void *data, u_int16_t data_len) { - u_int16_t alen, plen; - - alen = 4 /* origin */ + 7 /* nexthop */ + 7 /* lpref */; - plen = aspath_length(attr->aspath); - alen += 2 + plen + (plen > 255 ? 2 : 1); - if (attr->med != 0) - alen += 7; - if (attr->aggr_atm == 1) - alen += 3; - if (attr->aggr_as != 0) - alen += 9; - - return alen; + u_char *b = p; + u_int16_t tmp, tot_len = 2; /* attribute header (without len) */ + + if (data_len > 255) { + tot_len += 2 + data_len; + flags |= ATTR_EXTLEN; + } else + tot_len += 1 + data_len; + + if (tot_len > p_len) + return (-1); + + *b++ = flags; + *b++ = type; + if (data_len > 255) { + tmp = htons(data_len); + memcpy(b, &tmp, 2); + b += 2; + } else + *b++ = (u_char)(data_len & 0xff); + + if (data_len != 0) + memcpy(b, data, data_len); + + return (tot_len); } -int -attr_dump(void *p, u_int16_t len, struct attr_flags *a) +void +attr_optadd(struct attr_flags *attr, u_int8_t flags, u_int8_t type, + u_char *data, u_int16_t len) { - u_char *buf = p; - u_int32_t tmp32; - u_int16_t tmp16; - u_int16_t aslen, wlen = 0; - -#define ATTR_WRITE(b, a, alen) \ - do { \ - if ((wlen + (alen)) > len) \ - return (-1); \ - memcpy((b) + wlen, (a), (alen)); \ - wlen += (alen); \ - } while (0) -#define ATTR_WRITEB(b, c) \ - do { \ - if (wlen == len || (c) > 0xff) \ - return (-1); \ - (b)[wlen++] = (c); \ - } while (0) - - /* origin */ - ATTR_WRITEB(buf, ATTR_ORIGIN_FLAGS); - ATTR_WRITEB(buf, ATTR_ORIGIN); - ATTR_WRITEB(buf, 1); - ATTR_WRITEB(buf, a->origin); - - /* aspath */ - aslen = aspath_length(a->aspath); - ATTR_WRITEB(buf, ATTR_TRANSITIVE | (aslen>255 ? ATTR_EXTLEN : 0)); - ATTR_WRITEB(buf, ATTR_ASPATH); - if (aslen > 255) { - tmp16 = htonl(aslen); - ATTR_WRITE(buf, &tmp16, 4); - } else - ATTR_WRITEB(buf, aslen); - ATTR_WRITE(buf, aspath_dump(a->aspath), aslen); - - /* nexthop */ - ATTR_WRITEB(buf, ATTR_NEXTHOP_FLAGS); - ATTR_WRITEB(buf, ATTR_NEXTHOP); - ATTR_WRITEB(buf, 4); - ATTR_WRITE(buf, &a->nexthop, 4); /* network byte order */ - - /* MED */ - if (a->med != 0) { - ATTR_WRITEB(buf, ATTR_MED_FLAGS); - ATTR_WRITEB(buf, ATTR_MED); - ATTR_WRITEB(buf, 4); - tmp32 = htonl(a->med); - ATTR_WRITE(buf, &tmp32, 4); - } + struct attr *a, *p; + + if (flags & ATTR_OPTIONAL && ! flags & ATTR_TRANSITIVE) + /* + * We already know that we're not intrested in this attribute. + * Currently only the MED is optional and non-transitive but + * MED is directly stored in struct attr_flags. + */ + return; - /* local preference */ - ATTR_WRITEB(buf, ATTR_LOCALPREF_FLAGS); - ATTR_WRITEB(buf, ATTR_LOCALPREF); - ATTR_WRITEB(buf, 4); - tmp32 = htonl(a->lpref); - ATTR_WRITE(buf, &tmp32, 4); - - /* atomic aggregate */ - if (a->aggr_atm == 1) { - ATTR_WRITEB(buf, ATTR_ATOMIC_AGGREGATE_FLAGS); - ATTR_WRITEB(buf, ATTR_ATOMIC_AGGREGATE); - ATTR_WRITEB(buf, 0); + a = calloc(1, sizeof(struct attr)); + if (a == NULL) + fatal("attr_optadd"); + a->flags = flags; + a->type = type; + a->len = len; + if (len != 0) { + a->data = malloc(len); + if (a->data == NULL) + fatal("attr_optadd"); + memcpy(a->data, data, len); } - - /* aggregator */ - if (a->aggr_as != 0) { - ATTR_WRITEB(buf, ATTR_AGGREGATOR_FLAGS); - ATTR_WRITEB(buf, ATTR_AGGREGATOR); - ATTR_WRITEB(buf, 6); - tmp16 = htons(a->aggr_as); - ATTR_WRITE(buf, &tmp16, 2); - ATTR_WRITE(buf, &a->aggr_ip, 4); /* network byte order */ + /* keep a sorted list */ + TAILQ_FOREACH_REVERSE(p, &attr->others, attr_l, attr_list) { + if (type > p->type) { + TAILQ_INSERT_AFTER(&attr->others, p, a, attr_l); + return; + } + ENSURE(type != p->type); } +} + +void +attr_optfree(struct attr_flags *attr) +{ + struct attr *a, *xa; - return wlen; -#undef ATTR_WRITEB -#undef ATTR_WRITE + for (a = TAILQ_FIRST(&attr->others); a != TAILQ_END(&attr->others); + a = xa) { + xa = TAILQ_NEXT(a, attr_l); + free(a->data); + free(a); + } } /* aspath specific functions */ @@ -367,11 +385,19 @@ aspath_hash(struct aspath *aspath) } int -aspath_equal(struct aspath *a1, struct aspath *a2) +aspath_compare(struct aspath *a1, struct aspath *a2) { - if (a1->hdr.len == a2->hdr.len && - memcmp(a1->data, a2->data, a1->hdr.len) == 0) - return 1; + int r; + + if (a1->hdr.len > a2->hdr.len) + return (1); + if (a1->hdr.len < a2->hdr.len) + return (-1); + r = memcmp(a1->data, a2->data, a1->hdr.len); + if (r > 0) + return (1); + if (r < 0) + return (-1); return 0; } @@ -418,10 +444,15 @@ path_update(struct rde_peer *peer, struct attr_flags *attrs, RIB_STAT(path_update); if ((asp = path_get(attrs->aspath, peer)) == NULL) { + /* path not available */ asp = path_add(peer, attrs); pte = prefix_add(asp, prefix, prefixlen); } else { - if (attr_equal(&asp->flags, attrs) == 0) { + if (attr_compare(&asp->flags, attrs) == 0) + /* path are equal, just add prefix */ + pte = prefix_add(asp, prefix, prefixlen); + else { + /* non equal path attributes create new path */ if ((p = prefix_get(asp, prefix, prefixlen)) == NULL) { asp = path_add(peer, attrs); @@ -430,8 +461,7 @@ path_update(struct rde_peer *peer, struct attr_flags *attrs, asp = path_add(peer, attrs); pte = prefix_move(asp, p); } - } else - pte = prefix_add(asp, prefix, prefixlen); + } } } @@ -447,7 +477,7 @@ path_get(struct aspath *aspath, struct rde_peer *peer) ENSURE(head != NULL); LIST_FOREACH(asp, head, path_l) { - if (aspath_equal(asp->flags.aspath, aspath) && + if (aspath_compare(asp->flags.aspath, aspath) == 0 && peer == asp->peer) return asp; } @@ -553,14 +583,10 @@ path_unlink(struct rde_aspath *asp) asp->peer = NULL; asp->nexthop = NULL; - /* free the aspath and astags */ + /* free the aspath and all other path attributes */ aspath_destroy(asp->flags.aspath); asp->flags.aspath = NULL; - - /* - * astags_destroy(asp->flags.astags); - * asp->flags.astags = NULL; - */ + attr_optfree(&asp->flags); } /* alloc and initialize new entry. May not fail. */ @@ -585,7 +611,7 @@ path_free(struct rde_aspath *asp) RIB_STAT(path_free); ENSURE(asp->peer == NULL && asp->flags.aspath == NULL && - asp->flags.astags == NULL); + TAILQ_EMPTY(&asp->flags.others)); free(asp); } |