diff options
Diffstat (limited to 'usr.sbin/bgpd/rde_decide.c')
-rw-r--r-- | usr.sbin/bgpd/rde_decide.c | 415 |
1 files changed, 413 insertions, 2 deletions
diff --git a/usr.sbin/bgpd/rde_decide.c b/usr.sbin/bgpd/rde_decide.c index 77adf20beb9..4e66e58515b 100644 --- a/usr.sbin/bgpd/rde_decide.c +++ b/usr.sbin/bgpd/rde_decide.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_decide.c,v 1.11 2004/01/07 00:01:17 claudio Exp $ */ +/* $OpenBSD: rde_decide.c,v 1.12 2004/01/10 16:20:29 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -20,9 +20,19 @@ #include <sys/types.h> #include <sys/queue.h> +#include <stdlib.h> +#include <string.h> + #include "bgpd.h" #include "ensure.h" #include "rde.h" +#include "session.h" + +int prefix_cmp(struct prefix *, struct prefix *); +void up_generate_updates(struct prefix *, struct prefix *); +int up_generate_attr(struct rde_peer *, struct update_attr *, + struct attr_flags *); +int up_set_prefix(u_char *, int, struct in_addr, u_int8_t); /* * Decision Engine RFC implementation: @@ -106,7 +116,7 @@ * than the prefix p2. p1 should be used for the new prefix and p2 for a * already added prefix. */ -static int +int prefix_cmp(struct prefix *p1, struct prefix *p2) { struct rde_aspath *asp1, *asp2; @@ -216,6 +226,7 @@ prefix_evaluate(struct prefix *p, struct pt_entry *pte) * has an unreachable nexthop */ + up_generate_updates(xp, pte->active); rde_send_kroute(xp, pte->active); if (xp == NULL || xp->aspath->nexthop == NULL || @@ -229,3 +240,403 @@ prefix_evaluate(struct prefix *p, struct pt_entry *pte) } } } + + +/* update stuff. */ +struct update_prefix { + struct in_addr prefix; + int prefixlen; + struct uplist_prefix *prefix_h; + TAILQ_ENTRY(update_prefix) prefix_l; + RB_ENTRY(update_prefix) entry; +}; + +struct update_attr { + u_long attr_hash; + u_char *attr; + u_int16_t attr_len; + struct uplist_prefix prefix_h; + TAILQ_ENTRY(update_attr) attr_l; + RB_ENTRY(update_attr) entry; +}; + +int up_prefix_cmp(struct update_prefix *, struct update_prefix *); +int up_attr_cmp(struct update_attr *, struct update_attr *); +int up_add(struct rde_peer *, struct update_prefix *, struct update_attr *); + +RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp); +RB_GENERATE(uptree_prefix, update_prefix, entry, up_prefix_cmp); + +RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp); +RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp); + +void +up_init(struct rde_peer *peer) +{ + TAILQ_INIT(&peer->updates); + TAILQ_INIT(&peer->withdraws); + RB_INIT(&peer->up_prefix); + RB_INIT(&peer->up_attrs); + peer->up_pcnt = 0; + peer->up_acnt = 0; + peer->up_nlricnt = 0; + peer->up_wcnt = 0; +} + +void +up_down(struct rde_peer *peer) +{ + struct update_attr *ua, *xua; + struct update_prefix *up, *xup; + + for (ua = TAILQ_FIRST(&peer->updates); ua != TAILQ_END(&peer->updates); + ua = xua) { + xua = TAILQ_NEXT(ua, attr_l); + for (up = TAILQ_FIRST(&ua->prefix_h); + up != TAILQ_END(&ua->prefix_h); up = xup) { + xup = TAILQ_NEXT(up, prefix_l); + free(up); + } + free(ua); + } + + for (up = TAILQ_FIRST(&peer->withdraws); + up != TAILQ_END(&peer->withdraws); up = xup) { + xup = TAILQ_NEXT(up, prefix_l); + free(up); + } + + TAILQ_INIT(&peer->updates); + TAILQ_INIT(&peer->withdraws); + RB_INIT(&peer->up_prefix); + RB_INIT(&peer->up_attrs); + + peer->up_pcnt = 0; + peer->up_acnt = 0; + peer->up_nlricnt = 0; + peer->up_wcnt = 0; +} + +int +up_prefix_cmp(struct update_prefix *a, struct update_prefix *b) +{ + if (a->prefix.s_addr < b->prefix.s_addr) + return (-1); + if (a->prefix.s_addr > b->prefix.s_addr) + return (1); + if (a->prefixlen < b->prefixlen) + return (-1); + if (a->prefixlen > b->prefixlen) + return (1); + return (0); +} + +int +up_attr_cmp(struct update_attr *a, struct update_attr *b) +{ + if (a->attr_hash < b->attr_hash) + return (-1); + if (a->attr_hash > b->attr_hash) + return (1); + if (a->attr_len < b->attr_len) + return (-1); + if (a->attr_len > b->attr_len) + return (1); + return memcmp(a->attr, b->attr, a->attr_len); +} + +int +up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr *a) +{ + struct update_attr *na; + struct update_prefix *np; + + ENSURE(p != NULL); + + /* 1. search for attr */ + if (a != NULL && (na = RB_FIND(uptree_attr, &peer->up_attrs, a)) == + NULL) { + /* 1.1 if not found -> add */ + TAILQ_INIT(&a->prefix_h); + if (RB_INSERT(uptree_attr, &peer->up_attrs, a) != NULL) { + logit(LOG_CRIT, "uptree_attr insert failed"); + return (-1); + } + TAILQ_INSERT_TAIL(&peer->updates, a, attr_l); + peer->up_acnt++; + } else { + /* 1.2 if found -> use that, free a */ + if (a != NULL) { + free(a); + a = na; + /* move to end of update queue */ + TAILQ_REMOVE(&peer->updates, a, attr_l); + TAILQ_INSERT_TAIL(&peer->updates, a, attr_l); + } + } + + /* 2. search for prefix */ + if ((np = RB_FIND(uptree_prefix, &peer->up_prefix, p)) == NULL) { + /* 2.1 if not found -> add */ + if (RB_INSERT(uptree_prefix, &peer->up_prefix, p) != NULL) { + logit(LOG_CRIT, "uptree_prefix insert failed"); + return (-1); + } + peer->up_pcnt++; + } else { + /* 2.2 if found -> use that and free p */ + TAILQ_REMOVE(np->prefix_h, np, prefix_l); + free(p); + p = np; + if (p->prefix_h == &peer->withdraws) + peer->up_wcnt--; + else + peer->up_nlricnt--; + } + /* 3. link prefix to attr */ + if (a == NULL) { + TAILQ_INSERT_TAIL(&peer->withdraws, p, prefix_l); + p->prefix_h = &peer->withdraws; + peer->up_wcnt++; + } else { + TAILQ_INSERT_TAIL(&a->prefix_h, p, prefix_l); + p->prefix_h = &a->prefix_h; + peer->up_nlricnt++; + } + return (0); +} + +void +up_generate_updates(struct prefix *new, struct prefix *old) +{ + extern struct rde_peer_head peerlist; + struct rde_peer *peer; + struct update_attr *a; + struct update_prefix *p; + + if ((old == NULL || old->aspath->nexthop == NULL || + old->aspath->nexthop->state != NEXTHOP_REACH) && + (new == NULL || new->aspath->nexthop == NULL || + new->aspath->nexthop->state != NEXTHOP_REACH)) + return; + + LIST_FOREACH(peer, &peerlist, peer_l) { + if (peer->state != PEER_UP) + continue; + /* + * Filtering should be hooked up here. + * With filtering the decision if withdraw, update or nothing + * needs to be done on a per peer basis -- acctually per filter + * set. + */ + + p = calloc(1, sizeof(struct update_prefix)); + if (p == NULL) + fatal("up_queue_update"); + + if (new == NULL || new->aspath->nexthop == NULL || + new->aspath->nexthop->state != NEXTHOP_REACH) { + /* withdraw prefix */ + p->prefix = old->prefix->prefix; + p->prefixlen = old->prefix->prefixlen; + if (up_add(peer, p, NULL) == -1) + logit(LOG_CRIT, "queuing update failed."); + } else { + /* generate update */ + a = calloc(1, sizeof(struct update_attr)); + if (a == NULL) + fatal("up_queue_update"); + + if (up_generate_attr(peer, a, &new->aspath->flags) == + -1) + logit(LOG_CRIT, + "generation of bgp path attributes failed"); + + /* + * use aspath_hash as attr_hash, this may be unoptimal + * but currently I don't care. + */ + a->attr_hash = aspath_hash(new->aspath->flags.aspath); + p->prefix = new->prefix->prefix; + p->prefixlen = new->prefix->prefixlen; + + if (up_add(peer, p, a) == -1) + logit(LOG_CRIT, "queuing update failed."); + } + } +} + +u_char up_attr_buf[4096]; + +int +up_generate_attr(struct rde_peer *peer, struct update_attr *upa, + struct attr_flags *a) +{ + struct attr *oa; + u_int32_t tmp32; + int r; + u_int16_t aslen, len = sizeof(up_attr_buf), wlen = 0; + + /* origin */ + if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, + ATTR_ORIGIN, &a->origin, 1)) == -1) + return (-1); + wlen += r; len -= r; + + /* aspath */ + /* XXX XXX aspath prepends */ + aslen = aspath_length(a->aspath); + if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, + ATTR_ASPATH, aspath_dump(a->aspath), aslen)) == -1) + return (-1); + wlen += r; len -= r; + + /* nexthop, already network byte order */ + /* XXX XXX nexthop fixup */ + if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, + ATTR_NEXTHOP, &a->nexthop, 4)) == -1) + return (-1); + wlen += r; len -= r; + + /* + * The MED of other peers MUST not be announced to others. + * Currently we just dump it. Possibilities are setting the MED via + * a filter or set it to local-pref. struct attr_flags probably needs + * a med_in and a med_out field. + */ + + if (peer->conf.ebgp == 0) { + /* local preference, only valid for ibgp */ + tmp32 = htonl(a->lpref); + if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, + ATTR_LOCALPREF, &tmp32, 4)) == -1) + return (-1); + wlen += r; len -= r; + } + + /* + * dump all other path attributes. Following rules apply: + * 1. well-known attrs: ATTR_ATOMIC_AGGREGATE and ATTR_AGGREGATOR + * pass unmodified (enforce flags to correct values) + * 2. non-transitive attrs: don't re-announce + * 3. transitive known attrs: announce unmodified + * 4. transitive unknown attrs: set partial bit and re-announce + */ + TAILQ_FOREACH(oa, &a->others, attr_l) { + switch (oa->type) { + case ATTR_ATOMIC_AGGREGATE: + if ((r = attr_write(up_attr_buf + wlen, len, + ATTR_WELL_KNOWN, ATTR_ATOMIC_AGGREGATE, + NULL, 0)) == -1) + return (-1); + break; + case ATTR_AGGREGATOR: + if ((r = attr_write(up_attr_buf + wlen, len, + ATTR_OPTIONAL | ATTR_TRANSITIVE, ATTR_AGGREGATOR, + oa->data, oa->len)) == -1) + return (-1); + break; + /* + * currently there are no non-transitive or transitive known + * attributes. + */ + default: + /* unknown attribute */ + if (!(oa->flags & ATTR_OPTIONAL)) + /* somehow a non-transitive slipped through */ + break; + if ((r = attr_write(up_attr_buf + wlen, len, + oa->flags | ATTR_PARTIAL, oa->type, + oa->data, oa->len)) == -1) + return (-1); + break; + } + wlen += r; len -= r; + } + + /* the bgp path attributes are now stored in the global buf */ + upa->attr = malloc(wlen); + if (upa->attr == NULL) + fatal("up_generate_attr"); + memcpy(upa->attr, up_attr_buf, wlen); + upa->attr_len = wlen; + return (wlen); +} + +int +up_set_prefix(u_char *buf, int len, struct in_addr prefix, u_int8_t plen) +{ + int totlen; + + ENSURE(plen <= 32); + totlen = (plen + 7) / 8 + 1; + + if (totlen > len) + return (-1); + *buf++ = plen; + memcpy(buf, &prefix.s_addr, totlen - 1); + return (totlen); +} + +int +up_dump_prefix(u_char *buf, int len, struct uplist_prefix *prefix_head, + struct rde_peer *peer) +{ + struct update_prefix *upp, *xupp; + int r, wpos = 0; + + for (upp = TAILQ_FIRST(prefix_head); + upp != TAILQ_END(prefix_head); upp = xupp) { + xupp = TAILQ_NEXT(upp, prefix_l); + if ((r = up_set_prefix(buf + wpos, len - wpos, + upp->prefix, upp->prefixlen)) == -1) + break; + wpos += r; + if (RB_REMOVE(uptree_prefix, &peer->up_prefix, upp) == NULL) + logit(LOG_CRIT, "dequeuing update failed."); + TAILQ_REMOVE(upp->prefix_h, upp, prefix_l); + peer->up_pcnt--; + if (upp->prefix_h == &peer->withdraws) + peer->up_wcnt--; + else + peer->up_nlricnt--; + free(upp); + } + return (wpos); +} + +int +up_dump_attrnlri(u_char *buf, int len, struct rde_peer *peer) +{ + struct update_attr *upa; + int r, wpos; + u_int16_t attr_len; + + upa = TAILQ_FIRST(&peer->updates); + if (upa == NULL || upa->attr_len + 5 > len) + /* either no packet or not enough space */ + return (0); + + /* first dump the attributes */ + attr_len = htons(upa->attr_len); + memcpy(buf, &attr_len, 2); + wpos = 2; + memcpy(buf + wpos, upa->attr, upa->attr_len); + wpos += upa->attr_len; + + /* now dump the nlri */ + r = up_dump_prefix(buf + wpos, len - wpos, &upa->prefix_h, peer); + wpos += r; + + /* now check if all prefixes where written */ + if (TAILQ_EMPTY(&upa->prefix_h)) { + if (RB_REMOVE(uptree_attr, &peer->up_attrs, upa) == NULL) + logit(LOG_CRIT, "dequeuing update failed."); + TAILQ_REMOVE(&peer->updates, upa, attr_l); + free(upa); + peer->up_acnt--; + } + + return (wpos); +} + |