diff options
-rw-r--r-- | usr.sbin/bgpd/bgpd.h | 9 | ||||
-rw-r--r-- | usr.sbin/bgpd/mrt.c | 9 | ||||
-rw-r--r-- | usr.sbin/bgpd/mrt.h | 6 | ||||
-rw-r--r-- | usr.sbin/bgpd/name2id.c | 27 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde.c | 215 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde.h | 131 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde_decide.c | 60 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde_prefix.c | 163 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde_rib.c | 517 |
9 files changed, 652 insertions, 485 deletions
diff --git a/usr.sbin/bgpd/bgpd.h b/usr.sbin/bgpd/bgpd.h index 708ec8472e4..89613af46cf 100644 --- a/usr.sbin/bgpd/bgpd.h +++ b/usr.sbin/bgpd/bgpd.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bgpd.h,v 1.230 2009/05/05 20:12:04 sthen Exp $ */ +/* $OpenBSD: bgpd.h,v 1.231 2009/05/17 12:25:15 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -521,7 +521,7 @@ struct ctl_show_rib { u_int32_t med; u_int32_t prefix_cnt; u_int32_t active_cnt; - u_int32_t adjrib_cnt; + u_int32_t rib_cnt; u_int16_t aspath_len; u_int16_t flags; u_int8_t prefixlen; @@ -708,6 +708,7 @@ struct rrefresh { struct rde_memstats { int64_t path_cnt; int64_t prefix_cnt; + int64_t rib_cnt; int64_t pt4_cnt; int64_t pt6_cnt; int64_t nexthop_cnt; @@ -820,6 +821,10 @@ int pftable_addr_remove(struct pftable_msg *); int pftable_commit(void); /* name2id.c */ +u_int16_t rib_name2id(const char *); +const char *rib_id2name(u_int16_t); +void rib_unref(u_int16_t); +void rib_ref(u_int16_t); u_int16_t rtlabel_name2id(const char *); const char *rtlabel_id2name(u_int16_t); void rtlabel_unref(u_int16_t); diff --git a/usr.sbin/bgpd/mrt.c b/usr.sbin/bgpd/mrt.c index a2dfe8ecd2b..837f3c60d79 100644 --- a/usr.sbin/bgpd/mrt.c +++ b/usr.sbin/bgpd/mrt.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mrt.c,v 1.59 2009/04/29 20:04:30 claudio Exp $ */ +/* $OpenBSD: mrt.c,v 1.60 2009/05/17 12:25:15 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -374,7 +374,7 @@ mrt_clear_seq(void) } void -mrt_dump_upcall(struct pt_entry *pt, void *ptr) +mrt_dump_upcall(struct rib_entry *re, void *ptr) { struct mrt *mrtbuf = ptr; struct prefix *p; @@ -384,10 +384,7 @@ mrt_dump_upcall(struct pt_entry *pt, void *ptr) * dumps the table so we do the same. If only the active route should * be dumped p should be set to p = pt->active. */ - LIST_FOREACH(p, &pt->prefix_h, prefix_l) { - /* for now dump only stuff from the local-RIB */ - if (!(p->flags & F_LOCAL)) - continue; + LIST_FOREACH(p, &re->prefix_h, rib_l) { if (mrtbuf->type == MRT_TABLE_DUMP) mrt_dump_entry(mrtbuf, p, sequencenum++, p->aspath->peer); diff --git a/usr.sbin/bgpd/mrt.h b/usr.sbin/bgpd/mrt.h index 6a6bc805987..1ee1202455d 100644 --- a/usr.sbin/bgpd/mrt.h +++ b/usr.sbin/bgpd/mrt.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mrt.h,v 1.19 2009/03/19 07:00:07 claudio Exp $ */ +/* $OpenBSD: mrt.h,v 1.20 2009/05/17 12:25:15 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -292,7 +292,7 @@ struct mrt_config { struct peer; struct prefix; -struct pt_entry; +struct rib_entry; /* prototypes */ void mrt_dump_bgp_msg(struct mrt *, void *, u_int16_t, @@ -300,7 +300,7 @@ void mrt_dump_bgp_msg(struct mrt *, void *, u_int16_t, void mrt_dump_state(struct mrt *, u_int16_t, u_int16_t, struct peer *); void mrt_clear_seq(void); -void mrt_dump_upcall(struct pt_entry *, void *); +void mrt_dump_upcall(struct rib_entry *, void *); void mrt_write(struct mrt *); void mrt_clean(struct mrt *); void mrt_init(struct imsgbuf *, struct imsgbuf *); diff --git a/usr.sbin/bgpd/name2id.c b/usr.sbin/bgpd/name2id.c index a6c09640b7c..9e8774fd628 100644 --- a/usr.sbin/bgpd/name2id.c +++ b/usr.sbin/bgpd/name2id.c @@ -1,4 +1,4 @@ -/* $OpenBSD: name2id.c,v 1.7 2006/05/02 14:41:26 claudio Exp $ */ +/* $OpenBSD: name2id.c,v 1.8 2009/05/17 12:25:15 claudio Exp $ */ /* * Copyright (c) 2004, 2005 Henning Brauer <henning@openbsd.org> @@ -43,10 +43,35 @@ const char *_id2name(struct n2id_labels *, u_int16_t); void _unref(struct n2id_labels *, u_int16_t); void _ref(struct n2id_labels *, u_int16_t); +struct n2id_labels rib_labels = TAILQ_HEAD_INITIALIZER(rib_labels); struct n2id_labels rt_labels = TAILQ_HEAD_INITIALIZER(rt_labels); struct n2id_labels pftable_labels = TAILQ_HEAD_INITIALIZER(pftable_labels); u_int16_t +rib_name2id(const char *name) +{ + return (_name2id(&rib_labels, name)); +} + +const char * +rib_id2name(u_int16_t id) +{ + return (_id2name(&rib_labels, id)); +} + +void +rib_unref(u_int16_t id) +{ + _unref(&rib_labels, id); +} + +void +rib_ref(u_int16_t id) +{ + _ref(&rib_labels, id); +} + +u_int16_t rtlabel_name2id(const char *name) { return (_name2id(&rt_labels, name)); diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index a3cc01b82e8..9f1f14ccb8f 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.242 2009/05/11 19:16:21 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.243 2009/05/17 12:25:15 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -67,19 +67,20 @@ void rde_dump_filter(struct prefix *, struct ctl_show_rib_request *); void rde_dump_filterout(struct rde_peer *, struct prefix *, struct ctl_show_rib_request *); -void rde_dump_upcall(struct pt_entry *, void *); +void rde_dump_upcall(struct rib_entry *, void *); void rde_dump_as(struct ctl_show_rib_request *); -void rde_dump_prefix_upcall(struct pt_entry *, void *); +void rde_dump_prefix_upcall(struct rib_entry *, void *); void rde_dump_prefix(struct ctl_show_rib_request *); void rde_dump_community(struct ctl_show_rib_request *); void rde_dump_ctx_new(struct ctl_show_rib_request *, pid_t, enum imsg_type); +void rde_dump_done(void *); void rde_dump_runner(void); int rde_dump_pending(void); -void rde_up_dump_upcall(struct pt_entry *, void *); -void rde_softreconfig_out(struct pt_entry *, void *); -void rde_softreconfig_in(struct pt_entry *, void *); +void rde_up_dump_upcall(struct rib_entry *, void *); +void rde_softreconfig_out(struct rib_entry *, void *); +void rde_softreconfig_in(struct rib_entry *, void *); void rde_update_queue_runner(void); void rde_update6_queue_runner(void); @@ -96,7 +97,7 @@ void peer_send_eor(struct rde_peer *, u_int16_t, u_int16_t); void network_init(struct network_head *); void network_add(struct network_config *, int); void network_delete(struct network_config *, int); -void network_dump_upcall(struct pt_entry *, void *); +void network_dump_upcall(struct rib_entry *, void *); void rde_shutdown(void); int sa_cmp(struct bgpd_addr *, struct sockaddr *); @@ -115,7 +116,7 @@ struct rde_memstats rdemem; struct rde_dump_ctx { TAILQ_ENTRY(rde_dump_ctx) entry; - struct pt_context ptc; + struct rib_context ribctx; struct ctl_show_rib_request req; sa_family_t af; }; @@ -221,6 +222,7 @@ rde_main(struct bgpd_config *config, struct peer *peer_l, free(config->listen_addrs); pt_init(); + rib_init(); path_init(pathhashsize); aspath_init(pathhashsize); attr_init(attrhashsize); @@ -650,11 +652,14 @@ rde_dispatch_imsg_parent(struct imsgbuf *ibuf) } } /* sync local-RIB first */ + /* XXX this needs rework anyway */ if (reconf_in) - pt_dump(rde_softreconfig_in, NULL, AF_UNSPEC); + rib_dump(&ribs[0], rde_softreconfig_in, NULL, + AF_UNSPEC); /* then sync peers */ if (reconf_out) - pt_dump(rde_softreconfig_out, NULL, AF_UNSPEC); + rib_dump(&ribs[1], rde_softreconfig_out, NULL, + AF_UNSPEC); while ((r = TAILQ_FIRST(rules_l)) != NULL) { TAILQ_REMOVE(rules_l, r, entry); @@ -708,7 +713,7 @@ rde_dispatch_imsg_parent(struct imsgbuf *ibuf) free(mrt); mrt = xmrt; mrt_clear_seq(); - pt_dump(mrt_dump_upcall, mrt, + rib_dump(&ribs[1], mrt_dump_upcall, mrt, AF_UNSPEC); break; } @@ -854,8 +859,8 @@ rde_update_dispatch(struct imsg *imsg) peer->prefix_rcvd_withdraw++; rde_update_log("withdraw", peer, NULL, &prefix, prefixlen); - prefix_remove(peer, &prefix, prefixlen, F_LOCAL); - prefix_remove(peer, &prefix, prefixlen, F_ORIGINAL); + prefix_remove(&ribs[1], peer, &prefix, prefixlen, F_LOCAL); + prefix_remove(&ribs[0], peer, &prefix, prefixlen, F_ORIGINAL); } if (attrpath_len == 0) { @@ -913,10 +918,10 @@ rde_update_dispatch(struct imsg *imsg) peer->prefix_rcvd_withdraw++; rde_update_log("withdraw", peer, NULL, &prefix, prefixlen); - prefix_remove(peer, &prefix, prefixlen, - F_LOCAL); - prefix_remove(peer, &prefix, prefixlen, - F_ORIGINAL); + prefix_remove(&ribs[1], peer, &prefix, + prefixlen, F_LOCAL); + prefix_remove(&ribs[0], peer, &prefix, + prefixlen, F_ORIGINAL); } break; default: @@ -967,7 +972,7 @@ rde_update_dispatch(struct imsg *imsg) peer->prefix_rcvd_update++; /* add original path to the Adj-RIB-In */ if (peer->conf.softreconfig_in) - path_update(peer, asp, &prefix, prefixlen, F_ORIGINAL); + path_update(&ribs[0], peer, asp, &prefix, prefixlen); /* input filter */ if (rde_filter(&fasp, rules_l, peer, asp, &prefix, prefixlen, @@ -991,7 +996,7 @@ rde_update_dispatch(struct imsg *imsg) rde_update_log("update", peer, &fasp->nexthop->exit_nexthop, &prefix, prefixlen); - path_update(peer, fasp, &prefix, prefixlen, F_LOCAL); + path_update(&ribs[1], peer, fasp, &prefix, prefixlen); /* free modified aspath */ if (fasp != asp) @@ -1060,8 +1065,8 @@ rde_update_dispatch(struct imsg *imsg) peer->prefix_rcvd_update++; /* add original path to the Adj-RIB-In */ if (peer->conf.softreconfig_in) - path_update(peer, asp, &prefix, - prefixlen, F_ORIGINAL); + path_update(&ribs[0], peer, asp, + &prefix, prefixlen); /* input filter */ if (rde_filter(&fasp, rules_l, peer, asp, @@ -1088,8 +1093,8 @@ rde_update_dispatch(struct imsg *imsg) rde_update_log("update", peer, &asp->nexthop->exit_nexthop, &prefix, prefixlen); - path_update(peer, fasp, &prefix, prefixlen, - F_LOCAL); + path_update(&ribs[1], peer, fasp, &prefix, + prefixlen); /* free modified aspath */ if (fasp != asp) @@ -1713,7 +1718,7 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) rib.med = asp->med; rib.prefix_cnt = asp->prefix_cnt; rib.active_cnt = asp->active_cnt; - rib.adjrib_cnt = asp->adjrib_cnt; + rib.rib_cnt = asp->rib_cnt; strlcpy(rib.descr, asp->peer->conf.descr, sizeof(rib.descr)); memcpy(&rib.remote_addr, &asp->peer->remote_addr, sizeof(rib.remote_addr)); @@ -1734,7 +1739,7 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) rib.prefixlen = p->prefix->prefixlen; rib.origin = asp->origin; rib.flags = 0; - if (p->prefix->active == p) + if (p->rib->active == p) rib.flags |= F_RIB_ACTIVE; if (asp->peer->conf.ebgp == 0) rib.flags |= F_RIB_INTERNAL; @@ -1809,17 +1814,15 @@ rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req) { struct rde_peer *peer; - if ((req->flags & F_CTL_ADJ_IN && p->flags & F_ORIGINAL) || - (!(req->flags & (F_CTL_ADJ_IN|F_CTL_ADJ_OUT)) && - p->flags & F_LOCAL)) { + if (req->flags & F_CTL_ADJ_IN || + !(req->flags & (F_CTL_ADJ_IN|F_CTL_ADJ_OUT))) { if (req->peerid && req->peerid != p->aspath->peer->conf.id) return; rde_dump_rib_as(p, p->aspath, req->pid, req->flags); - } else if (req->flags & F_CTL_ADJ_OUT && p->flags & F_LOCAL) { - if (p->prefix->active != p) + } else if (req->flags & F_CTL_ADJ_OUT) { + if (p->rib->active != p) /* only consider active prefix */ return; - if (req->peerid) { if ((peer = peer_get(req->peerid)) != NULL) rde_dump_filterout(peer, p, req); @@ -1829,13 +1832,13 @@ rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req) } void -rde_dump_upcall(struct pt_entry *pt, void *ptr) +rde_dump_upcall(struct rib_entry *re, void *ptr) { struct prefix *p; - struct ctl_show_rib_request *req = ptr; + struct rde_dump_ctx *ctx = ptr; - LIST_FOREACH(p, &pt->prefix_h, prefix_l) - rde_dump_filter(p, req); + LIST_FOREACH(p, &re->prefix_h, rib_l) + rde_dump_filter(p, &ctx->req); } void @@ -1859,35 +1862,39 @@ rde_dump_as(struct ctl_show_rib_request *req) } void -rde_dump_prefix_upcall(struct pt_entry *pt, void *ptr) +rde_dump_prefix_upcall(struct rib_entry *re, void *ptr) { - struct ctl_show_rib_request *req = ptr; - struct prefix *p; - struct bgpd_addr addr; + struct rde_dump_ctx *ctx = ptr; + struct prefix *p; + struct pt_entry *pt; + struct bgpd_addr addr; + pt = re->prefix; pt_getaddr(pt, &addr); - if (addr.af != req->prefix.af) + if (addr.af != ctx->req.prefix.af) return; - if (req->prefixlen > pt->prefixlen) + if (ctx->req.prefixlen > pt->prefixlen) return; - if (!prefix_compare(&req->prefix, &addr, req->prefixlen)) - LIST_FOREACH(p, &pt->prefix_h, prefix_l) - rde_dump_filter(p, req); + if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen)) + LIST_FOREACH(p, &re->prefix_h, rib_l) + rde_dump_filter(p, &ctx->req); } void rde_dump_prefix(struct ctl_show_rib_request *req) { - struct pt_entry *pt; + struct rib_entry *re; + /* XXX other ribs ... */ if (req->prefixlen == 32) { - if ((pt = pt_lookup(&req->prefix)) != NULL) - rde_dump_upcall(pt, req); + if ((re = rib_lookup(&ribs[1], &req->prefix)) != NULL) + rde_dump_upcall(re, req); } else if (req->flags & F_LONGER) { - pt_dump(rde_dump_prefix_upcall, req, req->prefix.af); + rib_dump(&ribs[1], rde_dump_prefix_upcall, req, req->prefix.af); } else { - if ((pt = pt_get(&req->prefix, req->prefixlen)) != NULL) - rde_dump_upcall(pt, req); + if ((re = rib_get(&ribs[1], &req->prefix, req->prefixlen)) != + NULL) + rde_dump_upcall(re, req); } } @@ -1928,45 +1935,44 @@ rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request)); ctx->req.pid = pid; ctx->req.type = type; - ctx->ptc.count = RDE_RUNNER_ROUNDS; - ctx->af = ctx->req.af; - if (ctx->af == AF_UNSPEC) - ctx->af = AF_INET; + ctx->ribctx.ctx_count = RDE_RUNNER_ROUNDS; + ctx->ribctx.ctx_rib = &ribs[1]; /* XXX other ribs */ + switch (ctx->req.type) { + case IMSG_CTL_SHOW_NETWORK: + ctx->ribctx.ctx_upcall = network_dump_upcall; + break; + case IMSG_CTL_SHOW_RIB: + ctx->ribctx.ctx_upcall = rde_dump_upcall; + break; + default: + fatalx("rde_dump_ctx_new: unsupported imsg type"); + } + ctx->ribctx.ctx_done = rde_dump_done; + ctx->ribctx.ctx_arg = ctx; + ctx->ribctx.ctx_af = ctx->req.af; TAILQ_INSERT_TAIL(&rde_dump_h, ctx, entry); } void +rde_dump_done(void *arg) +{ + struct rde_dump_ctx *ctx = arg; + + imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, + -1, NULL, 0); + TAILQ_REMOVE(&rde_dump_h, ctx, entry); + free(ctx); +} + +void rde_dump_runner(void) { struct rde_dump_ctx *ctx, *next; for (ctx = TAILQ_FIRST(&rde_dump_h); ctx != NULL; ctx = next) { next = TAILQ_NEXT(ctx, entry); - if (ctx->ptc.done) { - imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, - -1, NULL, 0); - TAILQ_REMOVE(&rde_dump_h, ctx, entry); - free(ctx); - continue; - } - switch (ctx->req.type) { - case IMSG_CTL_SHOW_NETWORK: - pt_dump_r(network_dump_upcall, &ctx->req.pid, - ctx->af, &ctx->ptc); - break; - case IMSG_CTL_SHOW_RIB: - pt_dump_r(rde_dump_upcall, &ctx->req, ctx->af, - &ctx->ptc); - break; - default: - fatalx("rde_dump_runner: unsupported imsg type"); - } - if (ctx->ptc.done && ctx->req.af == AF_UNSPEC && - ctx->af == AF_INET) { - ctx->ptc.done = 0; - ctx->af = AF_INET6; - } + rib_dump_r(&ctx->ribctx); } } @@ -2111,9 +2117,10 @@ rde_send_nexthop(struct bgpd_addr *next, int valid) * soft reconfig specific functions */ void -rde_softreconfig_out(struct pt_entry *pt, void *ptr) +rde_softreconfig_out(struct rib_entry *re, void *ptr) { - struct prefix *p = pt->active; + struct prefix *p = re->active; + struct pt_entry *pt; struct rde_peer *peer; struct rde_aspath *oasp, *nasp; enum filter_actions oa, na; @@ -2122,6 +2129,7 @@ rde_softreconfig_out(struct pt_entry *pt, void *ptr) if (p == NULL) return; + pt = re->prefix; pt_getaddr(pt, &addr); LIST_FOREACH(peer, &peerlist, peer_l) { if (peer->conf.id == 0) @@ -2167,24 +2175,25 @@ done: } void -rde_softreconfig_in(struct pt_entry *pt, void *ptr) +rde_softreconfig_in(struct rib_entry *re, void *ptr) { struct prefix *p, *np; + struct pt_entry *pt; struct rde_peer *peer; struct rde_aspath *asp, *oasp, *nasp; enum filter_actions oa, na; struct bgpd_addr addr; + pt = re->prefix; pt_getaddr(pt, &addr); - for (p = LIST_FIRST(&pt->prefix_h); p != NULL; p = np) { - np = LIST_NEXT(p, prefix_l); - if (!(p->flags & F_ORIGINAL)) - continue; + for (p = LIST_FIRST(&re->prefix_h); p != NULL; p = np) { + np = LIST_NEXT(p, rib_l); /* store aspath as prefix may change till we're done */ asp = p->aspath; peer = asp->peer; + /* XXX how can this happen ??? */ if (peer->reconf_in == 0) continue; @@ -2201,19 +2210,19 @@ rde_softreconfig_in(struct pt_entry *pt, void *ptr) goto done; if (oa == ACTION_DENY && na == ACTION_ALLOW) { /* update Local-RIB */ - path_update(peer, nasp, &addr, pt->prefixlen, F_LOCAL); + path_update(&ribs[1], peer, nasp, &addr, pt->prefixlen); goto done; } if (oa == ACTION_ALLOW && na == ACTION_DENY) { /* remove from Local-RIB */ - prefix_remove(peer, &addr, pt->prefixlen, F_LOCAL); + prefix_remove(&ribs[1], peer, &addr, pt->prefixlen, F_LOCAL); goto done; } if (oa == ACTION_ALLOW && na == ACTION_ALLOW) { if (path_compare(nasp, oasp) == 0) goto done; /* send update */ - path_update(peer, nasp, &addr, pt->prefixlen, F_LOCAL); + path_update(&ribs[1], peer, nasp, &addr, pt->prefixlen); } done: @@ -2230,13 +2239,13 @@ done: u_char queue_buf[4096]; void -rde_up_dump_upcall(struct pt_entry *pt, void *ptr) +rde_up_dump_upcall(struct rib_entry *re, void *ptr) { struct rde_peer *peer = ptr; - if (pt->active == NULL) + if (re->active == NULL) return; - up_generate_updates(rules_l, peer, pt->active, NULL); + up_generate_updates(rules_l, peer, re->active, NULL); } void @@ -2634,14 +2643,17 @@ peer_dump(u_int32_t id, u_int16_t afi, u_int8_t safi) if (peer->conf.announce_type == ANNOUNCE_DEFAULT_ROUTE) up_generate_default(rules_l, peer, AF_INET); else - pt_dump(rde_up_dump_upcall, peer, AF_INET); + /* XXX totaly wrong ... */ + rib_dump(&ribs[1], rde_up_dump_upcall, peer, + AF_INET); } if (afi == AFI_ALL || afi == AFI_IPv6) if (safi == SAFI_ALL || safi == SAFI_UNICAST) { if (peer->conf.announce_type == ANNOUNCE_DEFAULT_ROUTE) up_generate_default(rules_l, peer, AF_INET6); else - pt_dump(rde_up_dump_upcall, peer, AF_INET6); + /* XXX again wrong rib */ + rib_dump(&ribs[1], rde_up_dump_upcall, peer, AF_INET6); } if (peer->capa_received.restart && peer->capa_announced.restart) @@ -2701,7 +2713,6 @@ void network_add(struct network_config *nc, int flagstatic) { struct rde_aspath *asp; - u_int32_t flags = F_PREFIX_ANNOUNCED; asp = path_get(); asp->aspath = aspath_get(NULL, 0); @@ -2710,11 +2721,11 @@ network_add(struct network_config *nc, int flagstatic) F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED; /* the nexthop is unset unless a default set overrides it */ if (!flagstatic) - flags |= F_ANN_DYNAMIC; + asp->flags |= F_ANN_DYNAMIC; rde_apply_set(asp, &nc->attrset, nc->prefix.af, peerself, peerself); - path_update(peerself, asp, &nc->prefix, nc->prefixlen, flags | F_ORIGINAL); - path_update(peerself, asp, &nc->prefix, nc->prefixlen, flags | F_LOCAL); + path_update(&ribs[0], peerself, asp, &nc->prefix, nc->prefixlen); + path_update(&ribs[1], peerself, asp, &nc->prefix, nc->prefixlen); path_put(asp); filterset_free(&nc->attrset); @@ -2728,12 +2739,14 @@ network_delete(struct network_config *nc, int flagstatic) if (!flagstatic) flags |= F_ANN_DYNAMIC; - prefix_remove(peerself, &nc->prefix, nc->prefixlen, flags | F_LOCAL); - prefix_remove(peerself, &nc->prefix, nc->prefixlen, flags | F_ORIGINAL); + prefix_remove(&ribs[0], peerself, &nc->prefix, nc->prefixlen, + flags | F_LOCAL); + prefix_remove(&ribs[1], peerself, &nc->prefix, nc->prefixlen, + flags | F_ORIGINAL); } void -network_dump_upcall(struct pt_entry *pt, void *ptr) +network_dump_upcall(struct rib_entry *re, void *ptr) { struct prefix *p; struct kroute k; @@ -2743,7 +2756,7 @@ network_dump_upcall(struct pt_entry *pt, void *ptr) memcpy(&pid, ptr, sizeof(pid)); - LIST_FOREACH(p, &pt->prefix_h, prefix_l) { + LIST_FOREACH(p, &re->prefix_h, rib_l) { if (!(p->aspath->flags & F_PREFIX_ANNOUNCED)) continue; if (p->prefix->af == AF_INET) { diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h index 78debe20f4b..7bf5e71f58c 100644 --- a/usr.sbin/bgpd/rde.h +++ b/usr.sbin/bgpd/rde.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.h,v 1.106 2009/04/23 19:23:27 claudio Exp $ */ +/* $OpenBSD: rde.h,v 1.107 2009/05/17 12:25:15 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> and @@ -42,6 +42,7 @@ LIST_HEAD(rde_peer_head, rde_peer); LIST_HEAD(aspath_head, rde_aspath); RB_HEAD(uptree_prefix, update_prefix); RB_HEAD(uptree_attr, update_attr); +RB_HEAD(rib_tree, rib_entry); TAILQ_HEAD(uplist_prefix, update_prefix); TAILQ_HEAD(uplist_attr, update_attr); @@ -66,7 +67,7 @@ struct rde_peer { u_int64_t prefix_sent_update; u_int64_t prefix_sent_withdraw; u_int32_t prefix_cnt; /* # of prefixes */ - u_int32_t adjrib_cnt; /* # of p. in Adj-RIB-In */ + u_int32_t rib_cnt; /* # of p. in Adj-RIB-In */ u_int32_t remote_bgpid; /* host byte order! */ u_int32_t up_pcnt; u_int32_t up_acnt; @@ -186,7 +187,7 @@ struct rde_aspath { u_int32_t weight; /* low prio lpref */ u_int32_t prefix_cnt; /* # of prefixes */ u_int32_t active_cnt; /* # of active prefixes */ - u_int32_t adjrib_cnt; /* # of p. in Adj-RIB-In */ + u_int32_t rib_cnt; /* # of p. in Adj-RIB-In */ u_int32_t flags; /* internally used */ u_int16_t rtlabelid; /* route label id */ u_int16_t pftableid; /* pf table id */ @@ -225,53 +226,64 @@ struct pt_entry { RB_ENTRY(pt_entry) pt_e; sa_family_t af; u_int8_t prefixlen; - struct prefix_head prefix_h; - struct prefix *active; /* for fast access */ + u_int16_t refcnt; }; struct pt_entry4 { RB_ENTRY(pt_entry) pt_e; sa_family_t af; u_int8_t prefixlen; - struct prefix_head prefix_h; - struct prefix *active; /* for fast access */ + u_int16_t refcnt; struct in_addr prefix4; - /* - * Route Flap Damping structures - * Currently I think they belong into the prefix but for the moment - * we just ignore the dampening at all. - */ }; struct pt_entry6 { RB_ENTRY(pt_entry) pt_e; sa_family_t af; u_int8_t prefixlen; - struct prefix_head prefix_h; - struct prefix *active; /* for fast access */ + u_int16_t refcnt; struct in6_addr prefix6; }; -struct pt_context { - union { - struct pt_entry p; - struct pt_entry4 p4; - struct pt_entry6 p6; - } pu; -#define ctx_p pu.p -#define ctx_p4 pu.p4 -#define ctx_p6 pu.p6 - /* only count and done should be accessed by callers */ - unsigned int count; - int done; +struct rib_context { + LIST_ENTRY(rib_context) entry; + struct pt_entry *ctx_p; + struct rib *ctx_rib; + void (*ctx_upcall)(struct rib_entry *, void *); + void (*ctx_done)(void *); + void *ctx_arg; + unsigned int ctx_count; + sa_family_t ctx_af; +}; + +struct rib_entry { + RB_ENTRY(rib_entry) rib_e; + struct prefix_head prefix_h; + struct prefix *active; /* for fast access */ + struct pt_entry *prefix; + struct rib *rib; +}; + +enum rib_state { + RIB_NONE, + RIB_ACTIVE, + RIB_DELETE +}; + +struct rib { + char name[PEER_DESCR_LEN]; + struct rib_tree rib; + LIST_HEAD(, rib_context) ctxts; + enum rib_state state; + u_int16_t id; }; struct prefix { - LIST_ENTRY(prefix) prefix_l, path_l; + LIST_ENTRY(prefix) rib_l, path_l; struct rde_aspath *aspath; struct pt_entry *prefix; + struct rib_entry *rib; /* NULL for Adj-RIB-In */ time_t lastchange; - u_int32_t flags; }; extern struct rde_memstats rdemem; @@ -331,10 +343,22 @@ int community_set(struct rde_aspath *, int, int); void community_delete(struct rde_aspath *, int, int); /* rde_rib.c */ +extern u_int16_t rib_size; +extern struct rib *ribs; + +void rib_init(void); +u_int16_t rib_new(char *); +void rib_free(struct rib *); +struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int); +struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *); +void rib_dump(struct rib *, void (*)(struct rib_entry *, void *), + void *, sa_family_t); +void rib_dump_r(struct rib_context *); + void path_init(u_int32_t); void path_shutdown(void); -void path_update(struct rde_peer *, struct rde_aspath *, - struct bgpd_addr *, int, u_int32_t); +void path_update(struct rib *, struct rde_peer *, + struct rde_aspath *, struct bgpd_addr *, int); int path_compare(struct rde_aspath *, struct rde_aspath *); struct rde_aspath *path_lookup(struct rde_aspath *, struct rde_peer *); void path_remove(struct rde_aspath *); @@ -347,15 +371,16 @@ void path_put(struct rde_aspath *); #define PREFIX_SIZE(x) (((x) + 7) / 8 + 1) int prefix_compare(const struct bgpd_addr *, const struct bgpd_addr *, int); -struct prefix *prefix_get(struct rde_peer *, struct bgpd_addr *, int, - u_int32_t); -struct pt_entry *prefix_add(struct rde_aspath *, struct bgpd_addr *, int, - u_int32_t); -struct pt_entry *prefix_move(struct rde_aspath *, struct prefix *, u_int32_t); -void prefix_remove(struct rde_peer *, struct bgpd_addr *, int, - u_int32_t); +struct prefix *prefix_get(struct rib *, struct rde_peer *, + struct bgpd_addr *, int, u_int32_t); +void prefix_add(struct rib *, struct rde_aspath *, + struct bgpd_addr *, int); +void prefix_move(struct rde_aspath *, struct prefix *); +void prefix_remove(struct rib *, struct rde_peer *, + struct bgpd_addr *, int, u_int32_t); int prefix_write(u_char *, int, struct bgpd_addr *, u_int8_t); -struct prefix *prefix_bypeer(struct pt_entry *, struct rde_peer *, u_int32_t); +struct prefix *prefix_bypeer(struct rib_entry *, struct rde_peer *, + u_int32_t); void prefix_updateall(struct rde_aspath *, enum nexthop_state, enum nexthop_state); void prefix_destroy(struct prefix *); @@ -373,7 +398,7 @@ struct nexthop *nexthop_get(struct bgpd_addr *); int nexthop_compare(struct nexthop *, struct nexthop *); /* rde_decide.c */ -void prefix_evaluate(struct prefix *, struct pt_entry *); +void prefix_evaluate(struct prefix *, struct rib_entry *); /* rde_update.c */ void up_init(struct rde_peer *); @@ -392,18 +417,28 @@ u_char *up_dump_mp_unreach(u_char *, u_int16_t *, struct rde_peer *); u_char *up_dump_mp_reach(u_char *, u_int16_t *, struct rde_peer *); /* rde_prefix.c */ -void pt_init(void); -void pt_shutdown(void); -int pt_empty(struct pt_entry *); -void pt_getaddr(struct pt_entry *, struct bgpd_addr *); +#define pt_empty(pt) ((pt)->refcnt == 0) +#define pt_ref(pt) do { \ + ++(pt)->refcnt; \ + if ((pt)->refcnt == 0) \ + fatalx("pt_ref: overflow"); \ +} while(0) +#define pt_unref(pt) do { \ + if ((pt)->refcnt == 0) \ + fatalx("pt_unref: underflow"); \ + --(pt)->refcnt; \ +} while(0) + +void pt_init(void); +void pt_shutdown(void); +void pt_getaddr(struct pt_entry *, struct bgpd_addr *); +struct pt_entry *pt_fill(struct bgpd_addr *, int); struct pt_entry *pt_get(struct bgpd_addr *, int); struct pt_entry *pt_add(struct bgpd_addr *, int); -void pt_remove(struct pt_entry *); +void pt_remove(struct pt_entry *); struct pt_entry *pt_lookup(struct bgpd_addr *); -void pt_dump(void (*)(struct pt_entry *, void *), void *, - sa_family_t); -void pt_dump_r(void (*)(struct pt_entry *, void *), void *, - sa_family_t, struct pt_context *); +int pt_prefix_cmp(const struct pt_entry *, const struct pt_entry *); + /* rde_filter.c */ enum filter_actions rde_filter(struct rde_aspath **, struct filter_head *, diff --git a/usr.sbin/bgpd/rde_decide.c b/usr.sbin/bgpd/rde_decide.c index c7f0d2dd93a..d7443dcb8ab 100644 --- a/usr.sbin/bgpd/rde_decide.c +++ b/usr.sbin/bgpd/rde_decide.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_decide.c,v 1.52 2009/04/23 19:23:27 claudio Exp $ */ +/* $OpenBSD: rde_decide.c,v 1.53 2009/05/17 12:25:15 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -115,12 +115,6 @@ prefix_cmp(struct prefix *p1, struct prefix *p2) if (p2 == NULL) return (1); - /* only prefixes in the Local-RIB are eligible */ - if (!(p1->flags & F_LOCAL)) - return (-1); - if (!(p2->flags & F_LOCAL)) - return (1); - asp1 = p1->aspath; asp2 = p2->aspath; @@ -202,8 +196,8 @@ prefix_cmp(struct prefix *p1, struct prefix *p2) sizeof(p1->aspath->peer->remote_addr))); /* 12. for announced prefixes prefer dynamic routes */ - if ((p1->flags & F_ANN_DYNAMIC) != (p2->flags & F_ANN_DYNAMIC)) { - if (p1->flags & F_ANN_DYNAMIC) + if ((asp1->flags & F_ANN_DYNAMIC) != (asp2->flags & F_ANN_DYNAMIC)) { + if (asp1->flags & F_ANN_DYNAMIC) return (1); else return (-1); @@ -219,59 +213,65 @@ prefix_cmp(struct prefix *p1, struct prefix *p2) * The to evaluate prefix must not be in the prefix list. */ void -prefix_evaluate(struct prefix *p, struct pt_entry *pte) +prefix_evaluate(struct prefix *p, struct rib_entry *re) { struct prefix *xp; + /* XXX we need to skip the adj-rib-in somehow */ + /* XXX hack for now, no idea why we end up here. */ + if (!re) { + log_debug("*BOOM*"); + return; + } + if (rde_noevaluate()) { /* decision process is turned off */ if (p != NULL) - LIST_INSERT_HEAD(&pte->prefix_h, p, prefix_l); - if (pte->active != NULL) { - pte->active->aspath->active_cnt--; - pte->active = NULL; + LIST_INSERT_HEAD(&re->prefix_h, p, rib_l); + if (re->active != NULL) { + re->active->aspath->active_cnt--; + re->active = NULL; } return; } if (p != NULL) { - if (LIST_EMPTY(&pte->prefix_h)) - LIST_INSERT_HEAD(&pte->prefix_h, p, prefix_l); + if (LIST_EMPTY(&re->prefix_h)) + LIST_INSERT_HEAD(&re->prefix_h, p, rib_l); else { - LIST_FOREACH(xp, &pte->prefix_h, prefix_l) + LIST_FOREACH(xp, &re->prefix_h, rib_l) if (prefix_cmp(p, xp) > 0) { - LIST_INSERT_BEFORE(xp, p, prefix_l); + LIST_INSERT_BEFORE(xp, p, rib_l); break; - } else if (LIST_NEXT(xp, prefix_l) == NULL) { + } else if (LIST_NEXT(xp, rib_l) == NULL) { /* if xp last element ... */ - LIST_INSERT_AFTER(xp, p, prefix_l); + LIST_INSERT_AFTER(xp, p, rib_l); break; } } } - xp = LIST_FIRST(&pte->prefix_h); - if (xp == NULL || !(xp->flags & F_LOCAL) || - xp->aspath->flags & F_ATTR_LOOP || + xp = LIST_FIRST(&re->prefix_h); + if (xp == NULL || xp->aspath->flags & F_ATTR_LOOP || (xp->aspath->nexthop != NULL && xp->aspath->nexthop->state != NEXTHOP_REACH)) /* xp is ineligible */ xp = NULL; - if (pte->active != xp) { + if (re->active != xp) { /* need to generate an update */ - if (pte->active != NULL) - pte->active->aspath->active_cnt--; + if (re->active != NULL) + re->active->aspath->active_cnt--; /* - * Send update with remove for pte->active and add for xp + * Send update with remove for re->active and add for xp * but remember that xp may be NULL aka ineligible. * Additional decision may be made by the called functions. */ - rde_generate_updates(xp, pte->active); - rde_send_kroute(xp, pte->active); + rde_generate_updates(xp, re->active); + rde_send_kroute(xp, re->active); - pte->active = xp; + re->active = xp; if (xp != NULL) xp->aspath->active_cnt++; } diff --git a/usr.sbin/bgpd/rde_prefix.c b/usr.sbin/bgpd/rde_prefix.c index f1a74bb9202..7eee8d3312e 100644 --- a/usr.sbin/bgpd/rde_prefix.c +++ b/usr.sbin/bgpd/rde_prefix.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_prefix.c,v 1.27 2009/04/26 14:40:03 claudio Exp $ */ +/* $OpenBSD: rde_prefix.c,v 1.28 2009/05/17 12:25:15 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -40,16 +40,12 @@ * pt_init: initialize prefix table. * pt_alloc?: allocate a AF specific pt_entry. Internal function. * pt_free: free a pt_entry. Internal function. - * pt_restart used to restart a tree walk at the spot it was aborted earlier. */ /* internal prototypes */ static struct pt_entry4 *pt_alloc4(void); static struct pt_entry6 *pt_alloc6(void); static void pt_free(struct pt_entry *); -static struct pt_entry *pt_restart(struct pt_context *); - -int pt_prefix_cmp(const struct pt_entry *, const struct pt_entry *); RB_HEAD(pt_tree, pt_entry); RB_PROTOTYPE(pt_tree, pt_entry, pt_e, pt_prefix_cmp); @@ -74,12 +70,6 @@ pt_shutdown(void) log_debug("pt_shutdown: IPv6 tree is not empty."); } -int -pt_empty(struct pt_entry *pte) -{ - return LIST_EMPTY(&pte->prefix_h); -} - void pt_getaddr(struct pt_entry *pte, struct bgpd_addr *addr) { @@ -101,14 +91,15 @@ pt_getaddr(struct pt_entry *pte, struct bgpd_addr *addr) } struct pt_entry * -pt_get(struct bgpd_addr *prefix, int prefixlen) +pt_fill(struct bgpd_addr *prefix, int prefixlen) { - struct pt_entry4 pte4; - struct pt_entry6 pte6; + static struct pt_entry4 pte4; + static struct pt_entry6 pte6; in_addr_t addr_hbo; switch (prefix->af) { case AF_INET: + bzero(&pte4, sizeof(pte4)); if (prefixlen > 32) fatalx("pt_get: bad IPv4 prefixlen"); pte4.af = AF_INET; @@ -116,14 +107,32 @@ pt_get(struct bgpd_addr *prefix, int prefixlen) pte4.prefix4.s_addr = htonl(addr_hbo & prefixlen2mask(prefixlen)); pte4.prefixlen = prefixlen; - return RB_FIND(pt_tree, &pttable4, (struct pt_entry *)&pte4); + return ((struct pt_entry *)&pte4); case AF_INET6: + bzero(&pte6, sizeof(pte6)); if (prefixlen > 128) fatalx("pt_get: bad IPv6 prefixlen"); pte6.af = AF_INET6; pte6.prefixlen = prefixlen; inet6applymask(&pte6.prefix6, &prefix->v6, prefixlen); - return RB_FIND(pt_tree, &pttable6, (struct pt_entry *)&pte6); + return ((struct pt_entry *)&pte6); + default: + log_warnx("pt_get: unknown af"); + return (NULL); + } +} + +struct pt_entry * +pt_get(struct bgpd_addr *prefix, int prefixlen) +{ + struct pt_entry *pte; + + pte = pt_fill(prefix, prefixlen); + switch (prefix->af) { + case AF_INET: + return RB_FIND(pt_tree, &pttable4, pte); + case AF_INET6: + return RB_FIND(pt_tree, &pttable6, pte); default: log_warnx("pt_get: unknown af"); } @@ -165,7 +174,6 @@ pt_add(struct bgpd_addr *prefix, int prefixlen) default: fatalx("pt_add: unknown af"); } - LIST_INIT(&p->prefix_h); if (RB_INSERT(pt_tree, tree, p) != NULL) { log_warnx("pt_add: insert failed"); @@ -179,7 +187,7 @@ void pt_remove(struct pt_entry *pte) { if (!pt_empty(pte)) - fatalx("pt_remove: entry not empty"); + fatalx("pt_remove: entry still holds references"); switch (pte->af) { case AF_INET: @@ -198,22 +206,22 @@ pt_remove(struct pt_entry *pte) } struct pt_entry * -pt_lookup(struct bgpd_addr *prefix) +pt_lookup(struct bgpd_addr *addr) { struct pt_entry *p; int i; - switch (prefix->af) { + switch (addr->af) { case AF_INET: for (i = 32; i >= 0; i--) { - p = pt_get(prefix, i); + p = pt_get(addr, i); if (p != NULL) return (p); } break; case AF_INET6: for (i = 128; i >= 0; i--) { - p = pt_get(prefix, i); + p = pt_get(addr, i); if (p != NULL) return (p); } @@ -224,58 +232,6 @@ pt_lookup(struct bgpd_addr *prefix) return (NULL); } -void -pt_dump(void (*upcall)(struct pt_entry *, void *), void *arg, sa_family_t af) -{ - if (af == AF_INET || af == AF_UNSPEC) - pt_dump_r(upcall, arg, AF_INET, NULL); - if (af == AF_INET6 || af == AF_UNSPEC) - pt_dump_r(upcall, arg, AF_INET6, NULL); -} - -void -pt_dump_r(void (*upcall)(struct pt_entry *, void *), void *arg, - sa_family_t af, struct pt_context *ctx) -{ - struct pt_entry *p; - unsigned int i; - - if (ctx == NULL || ctx->ctx_p.af != af) { - switch (af) { - case AF_INET: - p = RB_MIN(pt_tree, &pttable4); - break; - case AF_INET6: - p = RB_MIN(pt_tree, &pttable6); - break; - default: - return; - } - } else - p = pt_restart(ctx); - - for (i = 0; p != NULL; p = RB_NEXT(pt_tree, unused, p)) { - if (ctx && i++ >= ctx->count) { - /* store next start point */ - switch (p->af) { - case AF_INET: - ctx->ctx_p4 = *(struct pt_entry4 *)p; - break; - case AF_INET6: - ctx->ctx_p6 = *(struct pt_entry6 *)p; - break; - default: - fatalx("pt_dump_r: unknown af"); - } - return; - } - upcall(p, arg); - } - - if (ctx) - ctx->done = 1; -} - int pt_prefix_cmp(const struct pt_entry *a, const struct pt_entry *b) { @@ -283,8 +239,10 @@ pt_prefix_cmp(const struct pt_entry *a, const struct pt_entry *b) const struct pt_entry6 *a6, *b6; int i; - if (a->af != b->af) - fatalx("king bula sez: comparing pears with apples"); + if (a->af > b->af) + return (1); + if (a->af < b->af) + return (-1); switch (a->af) { case AF_INET: @@ -359,56 +317,3 @@ pt_free(struct pt_entry *pte) } free(pte); } - -static struct pt_entry * -pt_restart(struct pt_context *ctx) -{ - struct pt_entry *tmp, *prev = NULL; - int comp; - - /* first select correct tree */ - switch (ctx->ctx_p.af) { - case AF_INET: - tmp = RB_ROOT(&pttable4); - break; - case AF_INET6: - tmp = RB_ROOT(&pttable6); - break; - default: - fatalx("pt_restart: unknown af"); - } - - /* then try to find the element */ - while (tmp) { - prev = tmp; - comp = pt_prefix_cmp(&ctx->ctx_p, tmp); - if (comp < 0) - tmp = RB_LEFT(tmp, pt_e); - else if (comp > 0) - tmp = RB_RIGHT(tmp, pt_e); - else - return (tmp); - } - - /* no match, empty tree */ - if (prev == NULL) - return (NULL); - - /* - * no perfect match - * if last element was bigger use that as new start point - */ - if (comp < 0) - return (prev); - - /* backtrack until parent is bigger */ - do { - prev = RB_PARENT(prev, pt_e); - if (prev == NULL) - /* all elements in the tree are smaler */ - return (NULL); - comp = pt_prefix_cmp(&ctx->ctx_p, prev); - } while (comp > 0); - - return (prev); -} diff --git a/usr.sbin/bgpd/rde_rib.c b/usr.sbin/bgpd/rde_rib.c index 299cea252a1..2f651d6f1e3 100644 --- a/usr.sbin/bgpd/rde_rib.c +++ b/usr.sbin/bgpd/rde_rib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_rib.c,v 1.98 2009/04/23 19:23:27 claudio Exp $ */ +/* $OpenBSD: rde_rib.c,v 1.99 2009/05/17 12:25:15 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -33,14 +33,260 @@ * Therefore one thing needs to be absolutely avoided, long table walks. * This is achieved by heavily linking the different parts together. */ +u_int16_t rib_size; +struct rib *ribs; + +struct rib_entry *rib_add(struct rib *, struct bgpd_addr *, int); +int rib_compare(const struct rib_entry *, const struct rib_entry *); +void rib_remove(struct rib_entry *); +int rib_empty(struct rib_entry *); +struct rib_entry *rib_restart(struct rib_context *); + +RB_PROTOTYPE(rib_tree, rib_entry, rib_e, rib_compare); +RB_GENERATE(rib_tree, rib_entry, rib_e, rib_compare); + + +/* RIB specific functions */ +void +rib_init(void) +{ + if ((ribs = calloc(1, sizeof(struct rib))) == NULL) + fatal("rib_init"); + rib_new("DEFAULT"); +} + +u_int16_t +rib_new(char *name) +{ + struct rib *xribs; + size_t newsize; + u_int16_t id; + + id = rib_name2id(name); + + if (id >= rib_size) { + newsize = sizeof(struct rib) * (id + 1); + if ((xribs = realloc(ribs, newsize)) == NULL) { + /* XXX this is not clever */ + fatal("rib_add"); + } + ribs = xribs; + rib_size = id + 1; + } + + bzero(&ribs[id], sizeof(struct rib)); + strlcpy(ribs[id].name, name, sizeof(ribs[id].name)); + RB_INIT(&ribs[id].rib); + ribs[id].state = RIB_ACTIVE; + ribs[id].id = id; + + return (id); +} + +void +rib_free(struct rib *rib) +{ + /* XXX */ + //bzero(rib, sizeof(struct rib)); +} + +int +rib_compare(const struct rib_entry *a, const struct rib_entry *b) +{ + return (pt_prefix_cmp(a->prefix, b->prefix)); +} + +struct rib_entry * +rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) +{ + struct rib_entry xre; + struct pt_entry *pte; + + pte = pt_fill(prefix, prefixlen); + bzero(&xre, sizeof(xre)); + xre.prefix = pte; + + return (RB_FIND(rib_tree, &rib->rib, &xre)); +} + +struct rib_entry * +rib_lookup(struct rib *rib, struct bgpd_addr *addr) +{ + struct rib_entry *re; + int i; + + switch (addr->af) { + case AF_INET: + for (i = 32; i >= 0; i--) { + re = rib_get(rib, addr, i); + if (re != NULL) + return (re); + } + break; + case AF_INET6: + for (i = 128; i >= 0; i--) { + re = rib_get(rib, addr, i); + if (re != NULL) + return (re); + } + break; + default: + fatalx("rib_lookup: unknown af"); + } + return (NULL); +} + + +struct rib_entry * +rib_add(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) +{ + struct pt_entry *pte; + struct rib_entry *re; + + pte = pt_get(prefix, prefixlen); + if (pte == NULL) + pte = pt_add(prefix, prefixlen); + + if ((re = calloc(1, sizeof(*re))) == NULL) + fatal("rib_add"); + + LIST_INIT(&re->prefix_h); + re->prefix = pte; + re->rib = rib; + + if (RB_INSERT(rib_tree, &rib->rib, re) != NULL) { + log_warnx("rib_add: insert failed"); + return (NULL); + } + + pt_ref(pte); + + rdemem.rib_cnt++; + + return (re); +} + +void +rib_remove(struct rib_entry *re) +{ + if (!rib_empty(re)) + fatalx("rib_remove: entry not empty"); + + pt_unref(re->prefix); + if (pt_empty(re->prefix)) + pt_remove(re->prefix); + + if (RB_REMOVE(rib_tree, &re->rib->rib, re) == NULL) + log_warnx("rib_remove: remove failed."); + + free(re); + rdemem.rib_cnt--; +} + +int +rib_empty(struct rib_entry *re) +{ + return LIST_EMPTY(&re->prefix_h); +} + +void +rib_dump(struct rib *rib, void (*upcall)(struct rib_entry *, void *), + void *arg, sa_family_t af) +{ + struct rib_context ctx; + + bzero(&ctx, sizeof(ctx)); + ctx.ctx_rib = rib; + ctx.ctx_upcall = upcall; + ctx.ctx_arg = arg; + ctx.ctx_af = af; + rib_dump_r(&ctx); +} + +void +rib_dump_r(struct rib_context *ctx) +{ + struct rib_entry *re; + unsigned int i; + + if (ctx->ctx_p == NULL) { + re = RB_MIN(rib_tree, &ctx->ctx_rib->rib); + LIST_INSERT_HEAD(&ctx->ctx_rib->ctxts, ctx, entry); + } else + re = rib_restart(ctx); + + for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) { + if (ctx->ctx_count && i++ >= ctx->ctx_count) { + /* store next start point */ + ctx->ctx_p = re->prefix; + pt_ref(ctx->ctx_p); + return; + } + ctx->ctx_upcall(re, ctx->ctx_arg); + } + + LIST_REMOVE(ctx, entry); + if (ctx->ctx_done) + ctx->ctx_done(ctx->ctx_arg); +} + +struct rib_entry * +rib_restart(struct rib_context *ctx) +{ + struct rib_entry *tmp, *prev = NULL; + int comp; + + /* first unref the pt_entry and check if the table is still around */ + pt_unref(ctx->ctx_p); + if (pt_empty(ctx->ctx_p)) + pt_remove(ctx->ctx_p); + + if (ctx->ctx_rib == NULL) + return NULL; + + /* then try to find the element */ + tmp = RB_ROOT(&ctx->ctx_rib->rib); + while (tmp) { + prev = tmp; + comp = pt_prefix_cmp(ctx->ctx_p, tmp->prefix); + if (comp < 0) + tmp = RB_LEFT(tmp, rib_e); + else if (comp > 0) + tmp = RB_RIGHT(tmp, rib_e); + else + return (tmp); + } + + /* no match, empty tree */ + if (prev == NULL) + return (NULL); + + /* + * no perfect match + * if last element was bigger use that as new start point + */ + if (comp < 0) + return (prev); + + /* backtrack until parent is bigger */ + do { + prev = RB_PARENT(prev, rib_e); + if (prev == NULL) + /* all elements in the tree are smaler */ + return (NULL); + comp = pt_prefix_cmp(ctx->ctx_p, prev->prefix); + } while (comp > 0); + + return (prev); +} /* used to bump correct prefix counters */ -#define PREFIX_COUNT(x, f, op) \ +#define PREFIX_COUNT(x, id, op) \ do { \ - if (f & F_LOCAL) \ + if (id == 1) \ (x)->prefix_cnt += (op); \ - if (f & F_ORIGINAL) \ - (x)->adjrib_cnt += (op); \ + else \ + (x)->rib_cnt += (op); \ } while (0) /* path specific functions */ @@ -84,61 +330,31 @@ path_shutdown(void) } void -path_update(struct rde_peer *peer, struct rde_aspath *nasp, - struct bgpd_addr *prefix, int prefixlen, u_int32_t flags) +path_update(struct rib *rib, struct rde_peer *peer, struct rde_aspath *nasp, + struct bgpd_addr *prefix, int prefixlen) { struct rde_aspath *asp; - struct prefix *p, *oldp = NULL; + struct prefix *p; +#if 0 + /* XXX NEEDS SOMETHING BETTER HERE */ if (flags & F_LOCAL) { rde_send_pftable(nasp->pftableid, prefix, prefixlen, 0); rde_send_pftable_commit(); } +#endif /* - * First try to find a prefix in the specified RIB or in the - * Adj-RIB-In. This works because Local-RIB has precedence over the - * Adj-RIB-In. In the end this saves use some additional lookups. + * First try to find a prefix in the specified RIB. */ - if ((p = prefix_get(peer, prefix, prefixlen, flags | F_ORIGINAL)) != - NULL) { - do { - if (path_compare(nasp, p->aspath) == 0) { - if ((p->flags & flags & F_RIB_MASK) == 0) { - if (oldp != NULL) { - asp = oldp->aspath; - prefix_destroy(oldp); - if (path_empty(asp)) - path_destroy(asp); - } - p->flags |= flags & F_RIB_MASK; - PREFIX_COUNT(p->aspath, flags, 1); - PREFIX_COUNT(peer, flags, 1); - - /* re-evaluate prefix */ - LIST_REMOVE(p, prefix_l); - prefix_evaluate(p, p->prefix); - } - /* update last change */ - p->lastchange = time(NULL); - return; - } - /* - * If the prefix is not already part of the Adj-RIB-In - * do a lookup in there. But keep the original prefix - * around so that it can be removed later. - */ - if (p->flags & F_ORIGINAL) - break; - oldp = p; - p = prefix_get(peer, prefix, prefixlen, F_ORIGINAL); - } while (p != NULL); + if ((p = prefix_get(rib, peer, prefix, prefixlen, 0)) != NULL) { + if (path_compare(nasp, p->aspath) == 0) { + /* no change, update last change */ + p->lastchange = time(NULL); + return; + } } - /* Do not try to move a prefix that is in the wrong RIB. */ - if (p == NULL || (p->flags & flags & F_RIB_MASK) == 0) - p = oldp; - /* * Either the prefix does not exist or the path changed. * In both cases lookup the new aspath to make sure it is not @@ -152,9 +368,9 @@ path_update(struct rde_peer *peer, struct rde_aspath *nasp, /* If the prefix was found move it else add it to the aspath. */ if (p != NULL) - prefix_move(asp, p, flags); + prefix_move(asp, p); else - prefix_add(asp, prefix, prefixlen, flags); + prefix_add(rib, asp, prefix, prefixlen); } int @@ -221,14 +437,18 @@ void path_remove(struct rde_aspath *asp) { struct prefix *p; - struct bgpd_addr addr; while ((p = LIST_FIRST(&asp->prefix_h)) != NULL) { +#if 0 /* Commit is done in peer_down() */ + /* XXX AGAIN NEEDS A BETTER SOLUTION */ + struct bgpd_addr addr; + pt_getaddr(p->prefix, &addr); if (p->flags & F_LOCAL) rde_send_pftable(p->aspath->pftableid, &addr, p->prefix->prefixlen, 1); +#endif prefix_destroy(p); } @@ -241,7 +461,7 @@ path_destroy(struct rde_aspath *asp) { /* path_destroy can only unlink and free empty rde_aspath */ if (asp->prefix_cnt != 0 || asp->active_cnt != 0 || - asp->adjrib_cnt != 0) + asp->rib_cnt != 0) log_warnx("path_destroy: prefix count out of sync"); nexthop_unlink(asp); @@ -354,8 +574,8 @@ path_put(struct rde_aspath *asp) static struct prefix *prefix_alloc(void); static void prefix_free(struct prefix *); -static void prefix_link(struct prefix *, struct pt_entry *, - struct rde_aspath *, u_int32_t); +static void prefix_link(struct prefix *, struct rib_entry *, + struct rde_aspath *); static void prefix_unlink(struct prefix *); int @@ -404,51 +624,51 @@ prefix_compare(const struct bgpd_addr *a, const struct bgpd_addr *b, * search for specified prefix of a peer. Returns NULL if not found. */ struct prefix * -prefix_get(struct rde_peer *peer, struct bgpd_addr *prefix, int prefixlen, - u_int32_t flags) +prefix_get(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix, + int prefixlen, u_int32_t flags) { - struct pt_entry *pte; + struct rib_entry *re; - pte = pt_get(prefix, prefixlen); - if (pte == NULL) + re = rib_get(rib, prefix, prefixlen); + if (re == NULL) return (NULL); - return (prefix_bypeer(pte, peer, flags)); + return (prefix_bypeer(re, peer, flags)); } /* * Adds or updates a prefix. */ -struct pt_entry * -prefix_add(struct rde_aspath *asp, struct bgpd_addr *prefix, int prefixlen, - u_int32_t flags) +void +prefix_add(struct rib *rib, struct rde_aspath *asp, struct bgpd_addr *prefix, + int prefixlen) { - struct prefix *p; - struct pt_entry *pte; + struct prefix *p; + struct rib_entry *re; - pte = pt_get(prefix, prefixlen); - if (pte == NULL) - pte = pt_add(prefix, prefixlen); + re = rib_get(rib, prefix, prefixlen); + if (re == NULL) + re = rib_add(rib, prefix, prefixlen); - p = prefix_bypeer(pte, asp->peer, flags); + p = prefix_bypeer(re, asp->peer, asp->flags); if (p == NULL) { p = prefix_alloc(); - prefix_link(p, pte, asp, flags); + prefix_link(p, re, asp); } else { - if (p->aspath != asp) + if (p->aspath != asp) { /* prefix belongs to a different aspath so move */ - return (prefix_move(asp, p, flags)); + prefix_move(asp, p); + return; + } p->lastchange = time(NULL); } - - return (pte); } /* * Move the prefix to the specified as path, removes the old asp if needed. */ -struct pt_entry * -prefix_move(struct rde_aspath *asp, struct prefix *p, u_int32_t flags) +void +prefix_move(struct rde_aspath *asp, struct prefix *p) { struct prefix *np; struct rde_aspath *oasp; @@ -461,45 +681,18 @@ prefix_move(struct rde_aspath *asp, struct prefix *p, u_int32_t flags) np->aspath = asp; /* peer and prefix pointers are still equal */ np->prefix = p->prefix; + np->rib = p->rib; np->lastchange = time(NULL); - np->flags = flags; /* add to new as path */ LIST_INSERT_HEAD(&asp->prefix_h, np, path_l); - PREFIX_COUNT(asp, flags, 1); + PREFIX_COUNT(asp, p->rib->rib->id, 1); /* * no need to update the peer prefix count because we are only moving * the prefix without changing the peer. */ /* - * fiddle around with the flags. If the p->flags is not equal - * to flags the old prefix p may not be removed but instead p->flags - * needs to be adjusted. - */ - if ((p->flags & F_RIB_MASK) != (flags & F_RIB_MASK)) { - if ((p->flags & flags & F_RIB_MASK) == 0) - fatalx("prefix_move: " - "prefix is not part of desired RIB"); - - p->flags &= ~(flags & F_RIB_MASK); - PREFIX_COUNT(p->aspath, flags, -1); - /* as before peer count needs no update because of move */ - - /* redo the route decision for p */ - LIST_REMOVE(p, prefix_l); - /* If the prefix is the active one remove it first. */ - if (p == p->prefix->active) - prefix_evaluate(NULL, p->prefix); - prefix_evaluate(p, p->prefix); - - /* and now for np */ - prefix_evaluate(np, np->prefix); - - return (np->prefix); - } - - /* * First kick the old prefix node out of the prefix list, * afterwards run the route decision for new prefix node. * Because of this only one update is generated if the prefix @@ -507,25 +700,24 @@ prefix_move(struct rde_aspath *asp, struct prefix *p, u_int32_t flags) * This is save because we create a new prefix and so the change * is noticed by prefix_evaluate(). */ - LIST_REMOVE(p, prefix_l); - prefix_evaluate(np, np->prefix); + LIST_REMOVE(p, rib_l); + prefix_evaluate(np, np->rib); /* remove old prefix node */ oasp = p->aspath; LIST_REMOVE(p, path_l); - PREFIX_COUNT(oasp, flags, -1); + PREFIX_COUNT(oasp, p->rib->rib->id, -1); /* as before peer count needs no update because of move */ /* destroy all references to other objects and free the old prefix */ p->aspath = NULL; p->prefix = NULL; + p->rib = NULL; prefix_free(p); /* destroy old path if empty */ if (path_empty(oasp)) path_destroy(oasp); - - return (np->prefix); } /* @@ -533,50 +725,37 @@ prefix_move(struct rde_aspath *asp, struct prefix *p, u_int32_t flags) * pt_entry -- become empty remove them too. */ void -prefix_remove(struct rde_peer *peer, struct bgpd_addr *prefix, int prefixlen, - u_int32_t flags) +prefix_remove(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix, + int prefixlen, u_int32_t flags) { struct prefix *p; - struct pt_entry *pte; + struct rib_entry *re; struct rde_aspath *asp; - pte = pt_get(prefix, prefixlen); - if (pte == NULL) /* Got a dummy withdrawn request */ + re = rib_get(rib, prefix, prefixlen); + if (re == NULL) /* Got a dummy withdrawn request */ return; - p = prefix_bypeer(pte, peer, flags); + p = prefix_bypeer(re, peer, flags); if (p == NULL) /* Got a dummy withdrawn request. */ return; asp = p->aspath; +#if 0 + /* XXX AGAIN THIS NEEDS A BETTER SOLUTION */ if (p->flags & F_LOCAL) { /* only prefixes in the local RIB were pushed into pf */ rde_send_pftable(asp->pftableid, prefix, prefixlen, 1); rde_send_pftable_commit(); } - - /* if prefix belongs to more than one RIB just remove one instance */ - if ((p->flags & F_RIB_MASK) != (flags & F_RIB_MASK)) { - p->flags &= ~(flags & F_RIB_MASK); - - PREFIX_COUNT(p->aspath, flags, -1); - PREFIX_COUNT(peer, flags, -1); - - /* redo the route decision for p */ - LIST_REMOVE(p, prefix_l); - /* If the prefix is the active one remove it first. */ - if (p == p->prefix->active) - prefix_evaluate(NULL, p->prefix); - prefix_evaluate(p, p->prefix); - return; - } +#endif prefix_unlink(p); prefix_free(p); - if (pt_empty(pte)) - pt_remove(pte); + if (rib_empty(re)) + rib_remove(re); if (path_empty(asp)) path_destroy(asp); } @@ -604,22 +783,23 @@ prefix_write(u_char *buf, int len, struct bgpd_addr *prefix, u_int8_t plen) * belonging to the peer peer. Returns NULL if no match found. */ struct prefix * -prefix_bypeer(struct pt_entry *pte, struct rde_peer *peer, u_int32_t flags) +prefix_bypeer(struct rib_entry *re, struct rde_peer *peer, u_int32_t flags) { struct prefix *p; - LIST_FOREACH(p, &pte->prefix_h, prefix_l) { - if (p->aspath->peer != peer || - (p->flags & flags & F_RIB_MASK) == 0) + LIST_FOREACH(p, &re->prefix_h, rib_l) { + if (p->aspath->peer != peer) continue; - if (flags & F_PREFIX_ANNOUNCED && - (flags & F_ANN_DYNAMIC) != (p->flags & F_ANN_DYNAMIC)) + if (p->aspath->flags & flags && + (flags & F_ANN_DYNAMIC) != + (p->aspath->flags & F_ANN_DYNAMIC)) continue; return (p); } return (NULL); } +/* XXX this completely wrong somewhat */ void prefix_updateall(struct rde_aspath *asp, enum nexthop_state state, enum nexthop_state oldstate) @@ -632,11 +812,10 @@ prefix_updateall(struct rde_aspath *asp, enum nexthop_state state, LIST_FOREACH(p, &asp->prefix_h, path_l) { /* + * XXX THIS IS MISSING AT THE MOMENT * skip non local-RIB nodes, only local-RIB prefixes are * eligible. Both F_LOCAL and F_ORIGINAL may be set. */ - if (!(p->flags & F_LOCAL)) - continue; if (oldstate == state && state == NEXTHOP_REACH) { /* @@ -645,13 +824,13 @@ prefix_updateall(struct rde_aspath *asp, enum nexthop_state state, * or other internal infos. This will not change * the routing decision so shortcut here. */ - if (p == p->prefix->active) + if (p == p->rib->active) rde_send_kroute(p, NULL); continue; } /* redo the route decision */ - LIST_REMOVE(p, prefix_l); + LIST_REMOVE(p, rib_l); /* * If the prefix is the active one remove it first, * this has to be done because we can not detect when @@ -660,9 +839,9 @@ prefix_updateall(struct rde_aspath *asp, enum nexthop_state state, * prefix_evaluate() will generate no update because * the nexthop is unreachable or ineligible. */ - if (p == p->prefix->active) - prefix_evaluate(NULL, p->prefix); - prefix_evaluate(p, p->prefix); + if (p == p->rib->active) + prefix_evaluate(NULL, p->rib); + prefix_evaluate(p, p->rib); } } @@ -670,14 +849,14 @@ prefix_updateall(struct rde_aspath *asp, enum nexthop_state state, void prefix_destroy(struct prefix *p) { - struct pt_entry *pte; + struct rib_entry *re; - pte = p->prefix; + re = p->rib; prefix_unlink(p); prefix_free(p); - if (pt_empty(pte)) - pt_remove(pte); + if (rib_empty(re)) + rib_remove(re); } /* @@ -692,10 +871,11 @@ prefix_network_clean(struct rde_peer *peer, time_t reloadtime, u_int32_t flags) for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = xasp) { xasp = LIST_NEXT(asp, peer_l); + if ((asp->flags & F_ANN_DYNAMIC) == flags) + continue; for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = xp) { xp = LIST_NEXT(p, path_l); - if (reloadtime > p->lastchange && - (p->flags & F_ANN_DYNAMIC) == flags) { + if (reloadtime > p->lastchange) { pte = p->prefix; prefix_unlink(p); prefix_free(p); @@ -713,20 +893,20 @@ prefix_network_clean(struct rde_peer *peer, time_t reloadtime, u_int32_t flags) * Link a prefix into the different parent objects. */ static void -prefix_link(struct prefix *pref, struct pt_entry *pte, struct rde_aspath *asp, - u_int32_t flags) +prefix_link(struct prefix *pref, struct rib_entry *re, struct rde_aspath *asp) { LIST_INSERT_HEAD(&asp->prefix_h, pref, path_l); - PREFIX_COUNT(asp, flags, 1); - PREFIX_COUNT(asp->peer, flags, 1); + PREFIX_COUNT(asp, re->rib->id, 1); + PREFIX_COUNT(asp->peer, re->rib->id, 1); pref->aspath = asp; - pref->prefix = pte; + pref->rib = re; + pref->prefix = re->prefix; + pt_ref(pref->prefix); pref->lastchange = time(NULL); - pref->flags = flags; /* make route decision */ - prefix_evaluate(pref, pte); + prefix_evaluate(pref, re); } /* @@ -735,17 +915,24 @@ prefix_link(struct prefix *pref, struct pt_entry *pte, struct rde_aspath *asp, static void prefix_unlink(struct prefix *pref) { - /* make route decision */ - LIST_REMOVE(pref, prefix_l); - prefix_evaluate(NULL, pref->prefix); + if (pref->rib) { + /* make route decision */ + LIST_REMOVE(pref, rib_l); + prefix_evaluate(NULL, pref->rib); + } LIST_REMOVE(pref, path_l); - PREFIX_COUNT(pref->aspath, pref->flags, -1); - PREFIX_COUNT(pref->aspath->peer, pref->flags, -1); + PREFIX_COUNT(pref->aspath, pref->rib->rib->id, -1); + PREFIX_COUNT(pref->aspath->peer, pref->rib->rib->id, -1); + + pt_unref(pref->prefix); + if (pt_empty(pref->prefix)) + pt_remove(pref->prefix); /* destroy all references to other objects */ pref->aspath = NULL; pref->prefix = NULL; + pref->rib = NULL; /* * It's the caller's duty to remove empty aspath respectively pt_entry |