summaryrefslogtreecommitdiff
path: root/usr.sbin/bgpd/rde_decide.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr.sbin/bgpd/rde_decide.c')
-rw-r--r--usr.sbin/bgpd/rde_decide.c415
1 files changed, 413 insertions, 2 deletions
diff --git a/usr.sbin/bgpd/rde_decide.c b/usr.sbin/bgpd/rde_decide.c
index 77adf20beb9..4e66e58515b 100644
--- a/usr.sbin/bgpd/rde_decide.c
+++ b/usr.sbin/bgpd/rde_decide.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: rde_decide.c,v 1.11 2004/01/07 00:01:17 claudio Exp $ */
+/* $OpenBSD: rde_decide.c,v 1.12 2004/01/10 16:20:29 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org>
@@ -20,9 +20,19 @@
#include <sys/types.h>
#include <sys/queue.h>
+#include <stdlib.h>
+#include <string.h>
+
#include "bgpd.h"
#include "ensure.h"
#include "rde.h"
+#include "session.h"
+
+int prefix_cmp(struct prefix *, struct prefix *);
+void up_generate_updates(struct prefix *, struct prefix *);
+int up_generate_attr(struct rde_peer *, struct update_attr *,
+ struct attr_flags *);
+int up_set_prefix(u_char *, int, struct in_addr, u_int8_t);
/*
* Decision Engine RFC implementation:
@@ -106,7 +116,7 @@
* than the prefix p2. p1 should be used for the new prefix and p2 for a
* already added prefix.
*/
-static int
+int
prefix_cmp(struct prefix *p1, struct prefix *p2)
{
struct rde_aspath *asp1, *asp2;
@@ -216,6 +226,7 @@ prefix_evaluate(struct prefix *p, struct pt_entry *pte)
* has an unreachable nexthop
*/
+ up_generate_updates(xp, pte->active);
rde_send_kroute(xp, pte->active);
if (xp == NULL || xp->aspath->nexthop == NULL ||
@@ -229,3 +240,403 @@ prefix_evaluate(struct prefix *p, struct pt_entry *pte)
}
}
}
+
+
+/* update stuff. */
+struct update_prefix {
+ struct in_addr prefix;
+ int prefixlen;
+ struct uplist_prefix *prefix_h;
+ TAILQ_ENTRY(update_prefix) prefix_l;
+ RB_ENTRY(update_prefix) entry;
+};
+
+struct update_attr {
+ u_long attr_hash;
+ u_char *attr;
+ u_int16_t attr_len;
+ struct uplist_prefix prefix_h;
+ TAILQ_ENTRY(update_attr) attr_l;
+ RB_ENTRY(update_attr) entry;
+};
+
+int up_prefix_cmp(struct update_prefix *, struct update_prefix *);
+int up_attr_cmp(struct update_attr *, struct update_attr *);
+int up_add(struct rde_peer *, struct update_prefix *, struct update_attr *);
+
+RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp);
+RB_GENERATE(uptree_prefix, update_prefix, entry, up_prefix_cmp);
+
+RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp);
+RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp);
+
+void
+up_init(struct rde_peer *peer)
+{
+ TAILQ_INIT(&peer->updates);
+ TAILQ_INIT(&peer->withdraws);
+ RB_INIT(&peer->up_prefix);
+ RB_INIT(&peer->up_attrs);
+ peer->up_pcnt = 0;
+ peer->up_acnt = 0;
+ peer->up_nlricnt = 0;
+ peer->up_wcnt = 0;
+}
+
+void
+up_down(struct rde_peer *peer)
+{
+ struct update_attr *ua, *xua;
+ struct update_prefix *up, *xup;
+
+ for (ua = TAILQ_FIRST(&peer->updates); ua != TAILQ_END(&peer->updates);
+ ua = xua) {
+ xua = TAILQ_NEXT(ua, attr_l);
+ for (up = TAILQ_FIRST(&ua->prefix_h);
+ up != TAILQ_END(&ua->prefix_h); up = xup) {
+ xup = TAILQ_NEXT(up, prefix_l);
+ free(up);
+ }
+ free(ua);
+ }
+
+ for (up = TAILQ_FIRST(&peer->withdraws);
+ up != TAILQ_END(&peer->withdraws); up = xup) {
+ xup = TAILQ_NEXT(up, prefix_l);
+ free(up);
+ }
+
+ TAILQ_INIT(&peer->updates);
+ TAILQ_INIT(&peer->withdraws);
+ RB_INIT(&peer->up_prefix);
+ RB_INIT(&peer->up_attrs);
+
+ peer->up_pcnt = 0;
+ peer->up_acnt = 0;
+ peer->up_nlricnt = 0;
+ peer->up_wcnt = 0;
+}
+
+int
+up_prefix_cmp(struct update_prefix *a, struct update_prefix *b)
+{
+ if (a->prefix.s_addr < b->prefix.s_addr)
+ return (-1);
+ if (a->prefix.s_addr > b->prefix.s_addr)
+ return (1);
+ if (a->prefixlen < b->prefixlen)
+ return (-1);
+ if (a->prefixlen > b->prefixlen)
+ return (1);
+ return (0);
+}
+
+int
+up_attr_cmp(struct update_attr *a, struct update_attr *b)
+{
+ if (a->attr_hash < b->attr_hash)
+ return (-1);
+ if (a->attr_hash > b->attr_hash)
+ return (1);
+ if (a->attr_len < b->attr_len)
+ return (-1);
+ if (a->attr_len > b->attr_len)
+ return (1);
+ return memcmp(a->attr, b->attr, a->attr_len);
+}
+
+int
+up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr *a)
+{
+ struct update_attr *na;
+ struct update_prefix *np;
+
+ ENSURE(p != NULL);
+
+ /* 1. search for attr */
+ if (a != NULL && (na = RB_FIND(uptree_attr, &peer->up_attrs, a)) ==
+ NULL) {
+ /* 1.1 if not found -> add */
+ TAILQ_INIT(&a->prefix_h);
+ if (RB_INSERT(uptree_attr, &peer->up_attrs, a) != NULL) {
+ logit(LOG_CRIT, "uptree_attr insert failed");
+ return (-1);
+ }
+ TAILQ_INSERT_TAIL(&peer->updates, a, attr_l);
+ peer->up_acnt++;
+ } else {
+ /* 1.2 if found -> use that, free a */
+ if (a != NULL) {
+ free(a);
+ a = na;
+ /* move to end of update queue */
+ TAILQ_REMOVE(&peer->updates, a, attr_l);
+ TAILQ_INSERT_TAIL(&peer->updates, a, attr_l);
+ }
+ }
+
+ /* 2. search for prefix */
+ if ((np = RB_FIND(uptree_prefix, &peer->up_prefix, p)) == NULL) {
+ /* 2.1 if not found -> add */
+ if (RB_INSERT(uptree_prefix, &peer->up_prefix, p) != NULL) {
+ logit(LOG_CRIT, "uptree_prefix insert failed");
+ return (-1);
+ }
+ peer->up_pcnt++;
+ } else {
+ /* 2.2 if found -> use that and free p */
+ TAILQ_REMOVE(np->prefix_h, np, prefix_l);
+ free(p);
+ p = np;
+ if (p->prefix_h == &peer->withdraws)
+ peer->up_wcnt--;
+ else
+ peer->up_nlricnt--;
+ }
+ /* 3. link prefix to attr */
+ if (a == NULL) {
+ TAILQ_INSERT_TAIL(&peer->withdraws, p, prefix_l);
+ p->prefix_h = &peer->withdraws;
+ peer->up_wcnt++;
+ } else {
+ TAILQ_INSERT_TAIL(&a->prefix_h, p, prefix_l);
+ p->prefix_h = &a->prefix_h;
+ peer->up_nlricnt++;
+ }
+ return (0);
+}
+
+void
+up_generate_updates(struct prefix *new, struct prefix *old)
+{
+ extern struct rde_peer_head peerlist;
+ struct rde_peer *peer;
+ struct update_attr *a;
+ struct update_prefix *p;
+
+ if ((old == NULL || old->aspath->nexthop == NULL ||
+ old->aspath->nexthop->state != NEXTHOP_REACH) &&
+ (new == NULL || new->aspath->nexthop == NULL ||
+ new->aspath->nexthop->state != NEXTHOP_REACH))
+ return;
+
+ LIST_FOREACH(peer, &peerlist, peer_l) {
+ if (peer->state != PEER_UP)
+ continue;
+ /*
+ * Filtering should be hooked up here.
+ * With filtering the decision if withdraw, update or nothing
+ * needs to be done on a per peer basis -- acctually per filter
+ * set.
+ */
+
+ p = calloc(1, sizeof(struct update_prefix));
+ if (p == NULL)
+ fatal("up_queue_update");
+
+ if (new == NULL || new->aspath->nexthop == NULL ||
+ new->aspath->nexthop->state != NEXTHOP_REACH) {
+ /* withdraw prefix */
+ p->prefix = old->prefix->prefix;
+ p->prefixlen = old->prefix->prefixlen;
+ if (up_add(peer, p, NULL) == -1)
+ logit(LOG_CRIT, "queuing update failed.");
+ } else {
+ /* generate update */
+ a = calloc(1, sizeof(struct update_attr));
+ if (a == NULL)
+ fatal("up_queue_update");
+
+ if (up_generate_attr(peer, a, &new->aspath->flags) ==
+ -1)
+ logit(LOG_CRIT,
+ "generation of bgp path attributes failed");
+
+ /*
+ * use aspath_hash as attr_hash, this may be unoptimal
+ * but currently I don't care.
+ */
+ a->attr_hash = aspath_hash(new->aspath->flags.aspath);
+ p->prefix = new->prefix->prefix;
+ p->prefixlen = new->prefix->prefixlen;
+
+ if (up_add(peer, p, a) == -1)
+ logit(LOG_CRIT, "queuing update failed.");
+ }
+ }
+}
+
+u_char up_attr_buf[4096];
+
+int
+up_generate_attr(struct rde_peer *peer, struct update_attr *upa,
+ struct attr_flags *a)
+{
+ struct attr *oa;
+ u_int32_t tmp32;
+ int r;
+ u_int16_t aslen, len = sizeof(up_attr_buf), wlen = 0;
+
+ /* origin */
+ if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN,
+ ATTR_ORIGIN, &a->origin, 1)) == -1)
+ return (-1);
+ wlen += r; len -= r;
+
+ /* aspath */
+ /* XXX XXX aspath prepends */
+ aslen = aspath_length(a->aspath);
+ if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN,
+ ATTR_ASPATH, aspath_dump(a->aspath), aslen)) == -1)
+ return (-1);
+ wlen += r; len -= r;
+
+ /* nexthop, already network byte order */
+ /* XXX XXX nexthop fixup */
+ if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN,
+ ATTR_NEXTHOP, &a->nexthop, 4)) == -1)
+ return (-1);
+ wlen += r; len -= r;
+
+ /*
+ * The MED of other peers MUST not be announced to others.
+ * Currently we just dump it. Possibilities are setting the MED via
+ * a filter or set it to local-pref. struct attr_flags probably needs
+ * a med_in and a med_out field.
+ */
+
+ if (peer->conf.ebgp == 0) {
+ /* local preference, only valid for ibgp */
+ tmp32 = htonl(a->lpref);
+ if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN,
+ ATTR_LOCALPREF, &tmp32, 4)) == -1)
+ return (-1);
+ wlen += r; len -= r;
+ }
+
+ /*
+ * dump all other path attributes. Following rules apply:
+ * 1. well-known attrs: ATTR_ATOMIC_AGGREGATE and ATTR_AGGREGATOR
+ * pass unmodified (enforce flags to correct values)
+ * 2. non-transitive attrs: don't re-announce
+ * 3. transitive known attrs: announce unmodified
+ * 4. transitive unknown attrs: set partial bit and re-announce
+ */
+ TAILQ_FOREACH(oa, &a->others, attr_l) {
+ switch (oa->type) {
+ case ATTR_ATOMIC_AGGREGATE:
+ if ((r = attr_write(up_attr_buf + wlen, len,
+ ATTR_WELL_KNOWN, ATTR_ATOMIC_AGGREGATE,
+ NULL, 0)) == -1)
+ return (-1);
+ break;
+ case ATTR_AGGREGATOR:
+ if ((r = attr_write(up_attr_buf + wlen, len,
+ ATTR_OPTIONAL | ATTR_TRANSITIVE, ATTR_AGGREGATOR,
+ oa->data, oa->len)) == -1)
+ return (-1);
+ break;
+ /*
+ * currently there are no non-transitive or transitive known
+ * attributes.
+ */
+ default:
+ /* unknown attribute */
+ if (!(oa->flags & ATTR_OPTIONAL))
+ /* somehow a non-transitive slipped through */
+ break;
+ if ((r = attr_write(up_attr_buf + wlen, len,
+ oa->flags | ATTR_PARTIAL, oa->type,
+ oa->data, oa->len)) == -1)
+ return (-1);
+ break;
+ }
+ wlen += r; len -= r;
+ }
+
+ /* the bgp path attributes are now stored in the global buf */
+ upa->attr = malloc(wlen);
+ if (upa->attr == NULL)
+ fatal("up_generate_attr");
+ memcpy(upa->attr, up_attr_buf, wlen);
+ upa->attr_len = wlen;
+ return (wlen);
+}
+
+int
+up_set_prefix(u_char *buf, int len, struct in_addr prefix, u_int8_t plen)
+{
+ int totlen;
+
+ ENSURE(plen <= 32);
+ totlen = (plen + 7) / 8 + 1;
+
+ if (totlen > len)
+ return (-1);
+ *buf++ = plen;
+ memcpy(buf, &prefix.s_addr, totlen - 1);
+ return (totlen);
+}
+
+int
+up_dump_prefix(u_char *buf, int len, struct uplist_prefix *prefix_head,
+ struct rde_peer *peer)
+{
+ struct update_prefix *upp, *xupp;
+ int r, wpos = 0;
+
+ for (upp = TAILQ_FIRST(prefix_head);
+ upp != TAILQ_END(prefix_head); upp = xupp) {
+ xupp = TAILQ_NEXT(upp, prefix_l);
+ if ((r = up_set_prefix(buf + wpos, len - wpos,
+ upp->prefix, upp->prefixlen)) == -1)
+ break;
+ wpos += r;
+ if (RB_REMOVE(uptree_prefix, &peer->up_prefix, upp) == NULL)
+ logit(LOG_CRIT, "dequeuing update failed.");
+ TAILQ_REMOVE(upp->prefix_h, upp, prefix_l);
+ peer->up_pcnt--;
+ if (upp->prefix_h == &peer->withdraws)
+ peer->up_wcnt--;
+ else
+ peer->up_nlricnt--;
+ free(upp);
+ }
+ return (wpos);
+}
+
+int
+up_dump_attrnlri(u_char *buf, int len, struct rde_peer *peer)
+{
+ struct update_attr *upa;
+ int r, wpos;
+ u_int16_t attr_len;
+
+ upa = TAILQ_FIRST(&peer->updates);
+ if (upa == NULL || upa->attr_len + 5 > len)
+ /* either no packet or not enough space */
+ return (0);
+
+ /* first dump the attributes */
+ attr_len = htons(upa->attr_len);
+ memcpy(buf, &attr_len, 2);
+ wpos = 2;
+ memcpy(buf + wpos, upa->attr, upa->attr_len);
+ wpos += upa->attr_len;
+
+ /* now dump the nlri */
+ r = up_dump_prefix(buf + wpos, len - wpos, &upa->prefix_h, peer);
+ wpos += r;
+
+ /* now check if all prefixes where written */
+ if (TAILQ_EMPTY(&upa->prefix_h)) {
+ if (RB_REMOVE(uptree_attr, &peer->up_attrs, upa) == NULL)
+ logit(LOG_CRIT, "dequeuing update failed.");
+ TAILQ_REMOVE(&peer->updates, upa, attr_l);
+ free(upa);
+ peer->up_acnt--;
+ }
+
+ return (wpos);
+}
+