summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClaudio Jeker <claudio@cvs.openbsd.org>2019-07-17 10:13:27 +0000
committerClaudio Jeker <claudio@cvs.openbsd.org>2019-07-17 10:13:27 +0000
commit51e8db3a8349db1ac1ff9fd3f4cc1897c2a9849d (patch)
treeb9b081f8a1958b0003c13969af319d4fd258fdf2
parentef536df52e0e823c0e3d7598a9f2edcf752e43e1 (diff)
Change the Adj-RIB-Out to a per peer set of RB trees. The way RIB data
structures are linked does not scale for the Adj-RIB-Out and so inserts and updates into the Adj-RIB-Out did not scale because of some linear list traversals in hot paths. A synthetic test with 4000 peers announcing one prefix each showed that the initial convergence time dropped from around 1 hout to around 6min. Note: because the Adj-RIB-Out is now per peer the order in which prefixes are dumped in 'bgpctl show rib out' changed. Tested and OK job@, benno@, phessler@
-rw-r--r--usr.sbin/bgpd/mrt.c13
-rw-r--r--usr.sbin/bgpd/parse.y4
-rw-r--r--usr.sbin/bgpd/rde.c270
-rw-r--r--usr.sbin/bgpd/rde.h50
-rw-r--r--usr.sbin/bgpd/rde_decide.c17
-rw-r--r--usr.sbin/bgpd/rde_rib.c419
-rw-r--r--usr.sbin/bgpd/rde_update.c31
7 files changed, 604 insertions, 200 deletions
diff --git a/usr.sbin/bgpd/mrt.c b/usr.sbin/bgpd/mrt.c
index 9c6dd170b92..cf302470222 100644
--- a/usr.sbin/bgpd/mrt.c
+++ b/usr.sbin/bgpd/mrt.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: mrt.c,v 1.97 2019/06/25 21:33:55 benno Exp $ */
+/* $OpenBSD: mrt.c,v 1.98 2019/07/17 10:13:26 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org>
@@ -512,15 +512,11 @@ mrt_dump_entry_v2(struct mrt *mrt, struct rib_entry *re, u_int32_t snum)
goto fail;
}
nump = 0;
- LIST_FOREACH(p, &re->prefix_h, rib_l) {
+ LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
struct nexthop *nexthop;
struct bgpd_addr *nh;
struct ibuf *tbuf;
- /* skip pending withdraw in Adj-RIB-Out */
- if (prefix_aspath(p) == NULL)
- continue;
-
nexthop = prefix_nexthop(p);
if (nexthop == NULL) {
bzero(&addr, sizeof(struct bgpd_addr));
@@ -683,10 +679,7 @@ mrt_dump_upcall(struct rib_entry *re, void *ptr)
* dumps the table so we do the same. If only the active route should
* be dumped p should be set to p = pt->active.
*/
- LIST_FOREACH(p, &re->prefix_h, rib_l) {
- /* skip pending withdraw in Adj-RIB-Out */
- if (prefix_aspath(p) == NULL)
- continue;
+ LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
if (mrtbuf->type == MRT_TABLE_DUMP)
mrt_dump_entry(mrtbuf, p, mrtbuf->seqnum++,
prefix_peer(p));
diff --git a/usr.sbin/bgpd/parse.y b/usr.sbin/bgpd/parse.y
index eeb041bd156..7a0ef65021c 100644
--- a/usr.sbin/bgpd/parse.y
+++ b/usr.sbin/bgpd/parse.y
@@ -1,4 +1,4 @@
-/* $OpenBSD: parse.y,v 1.392 2019/06/22 05:36:40 claudio Exp $ */
+/* $OpenBSD: parse.y,v 1.393 2019/07/17 10:13:26 claudio Exp $ */
/*
* Copyright (c) 2002, 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -3282,8 +3282,6 @@ parse_config(char *filename, struct peer_head *ph)
add_rib("Adj-RIB-In", conf->default_tableid,
F_RIB_NOFIB | F_RIB_NOEVALUATE);
- add_rib("Adj-RIB-Out", conf->default_tableid,
- F_RIB_NOFIB | F_RIB_NOEVALUATE);
add_rib("Loc-RIB", conf->default_tableid, F_RIB_LOCAL);
if ((file = pushfile(filename, 1)) == NULL)
diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c
index b09646f46e5..700ca7a5fe1 100644
--- a/usr.sbin/bgpd/rde.c
+++ b/usr.sbin/bgpd/rde.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: rde.c,v 1.475 2019/07/01 07:07:08 claudio Exp $ */
+/* $OpenBSD: rde.c,v 1.476 2019/07/17 10:13:26 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -94,8 +94,8 @@ u_int8_t rde_roa_validity(struct rde_prefixset *,
void peer_init(u_int32_t);
void peer_shutdown(void);
int peer_localaddrs(struct rde_peer *, struct bgpd_addr *);
+struct rde_peer *peer_match(struct ctl_neighbor *, u_int32_t);
struct rde_peer *peer_add(u_int32_t, struct peer_config *);
-struct rde_peer *peer_get(u_int32_t);
void peer_up(u_int32_t, struct session_up *);
void peer_down(u_int32_t);
void peer_flush(struct rde_peer *, u_int8_t, time_t);
@@ -133,7 +133,7 @@ int softreconfig;
struct rde_dump_ctx {
LIST_ENTRY(rde_dump_ctx) entry;
struct ctl_show_rib_request req;
- u_int16_t rid;
+ u_int32_t peerid;
u_int8_t throttled;
};
@@ -220,7 +220,6 @@ rde_main(int debug, int verbose)
/* make sure the default RIBs are setup */
rib_new("Adj-RIB-In", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE);
- rib_new("Adj-RIB-Out", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE);
out_rules = calloc(1, sizeof(struct filter_head));
if (out_rules == NULL)
@@ -2242,7 +2241,7 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags)
rib.origin = asp->origin;
rib.validation_state = p->validation_state;
rib.flags = 0;
- if (p->re->active == p)
+ if (p->re != NULL && p->re->active == p)
rib.flags |= F_PREF_ACTIVE;
if (!prefix_peer(p)->conf.ebgp)
rib.flags |= F_PREF_INTERNAL;
@@ -2305,7 +2304,7 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags)
}
static int
-rde_dump_match_peer(struct rde_peer *p, struct ctl_neighbor *n)
+rde_match_peer(struct rde_peer *p, struct ctl_neighbor *n)
{
char *s;
@@ -2326,7 +2325,7 @@ rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req)
{
struct rde_aspath *asp;
- if (!rde_dump_match_peer(prefix_peer(p), &req->neighbor))
+ if (!rde_match_peer(prefix_peer(p), &req->neighbor))
return;
asp = prefix_aspath(p);
@@ -2353,10 +2352,10 @@ rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req)
static void
rde_dump_upcall(struct rib_entry *re, void *ptr)
{
- struct prefix *p;
struct rde_dump_ctx *ctx = ptr;
+ struct prefix *p;
- LIST_FOREACH(p, &re->prefix_h, rib_l)
+ LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
rde_dump_filter(p, &ctx->req);
}
@@ -2375,10 +2374,38 @@ rde_dump_prefix_upcall(struct rib_entry *re, void *ptr)
if (ctx->req.prefixlen > pt->prefixlen)
return;
if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen))
- LIST_FOREACH(p, &re->prefix_h, rib_l)
+ LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
rde_dump_filter(p, &ctx->req);
}
+static void
+rde_dump_adjout_upcall(struct prefix *p, void *ptr)
+{
+ struct rde_dump_ctx *ctx = ptr;
+
+ if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
+ return;
+ rde_dump_filter(p, &ctx->req);
+}
+
+static void
+rde_dump_adjout_prefix_upcall(struct prefix *p, void *ptr)
+{
+ struct rde_dump_ctx *ctx = ptr;
+ struct bgpd_addr addr;
+
+ if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
+ return;
+
+ pt_getaddr(p->pt, &addr);
+ if (addr.aid != ctx->req.prefix.aid)
+ return;
+ if (ctx->req.prefixlen > p->pt->prefixlen)
+ return;
+ if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen))
+ rde_dump_filter(p, &ctx->req);
+}
+
static int
rde_dump_throttled(void *arg)
{
@@ -2391,11 +2418,45 @@ static void
rde_dump_done(void *arg, u_int8_t aid)
{
struct rde_dump_ctx *ctx = arg;
+ struct rde_peer *peer;
+ u_int error;
- imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
- -1, NULL, 0);
+ if (ctx->req.flags & F_CTL_ADJ_OUT) {
+ peer = peer_match(&ctx->req.neighbor, ctx->peerid);
+ if (peer == NULL)
+ goto done;
+ ctx->peerid = peer->conf.id;
+ switch (ctx->req.type) {
+ case IMSG_CTL_SHOW_RIB:
+ if (prefix_dump_new(peer, ctx->req.aid,
+ CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall,
+ rde_dump_done, rde_dump_throttled) == -1)
+ goto nomem;
+ break;
+ case IMSG_CTL_SHOW_RIB_PREFIX:
+ if (prefix_dump_new(peer, ctx->req.aid,
+ CTL_MSG_HIGH_MARK, ctx,
+ rde_dump_adjout_prefix_upcall,
+ rde_dump_done, rde_dump_throttled) == -1)
+ goto nomem;
+ break;
+ default:
+ fatalx("%s: unsupported imsg type", __func__);
+ }
+ return;
+ }
+done:
+ imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, -1, NULL, 0);
LIST_REMOVE(ctx, entry);
free(ctx);
+ return;
+
+nomem:
+ log_warn(__func__);
+ error = CTL_RES_NOMEM;
+ imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, ctx->req.pid, -1, &error,
+ sizeof(error));
+ return;
}
void
@@ -2404,24 +2465,92 @@ rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid,
{
struct rde_dump_ctx *ctx;
struct rib_entry *re;
+ struct prefix *p;
u_int error;
u_int8_t hostplen;
u_int16_t rid;
if ((ctx = calloc(1, sizeof(*ctx))) == NULL) {
nomem:
- log_warn("rde_dump_ctx_new");
+ log_warn(__func__);
error = CTL_RES_NOMEM;
imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
sizeof(error));
return;
}
+
+ memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request));
+ ctx->req.pid = pid;
+ ctx->req.type = type;
+
if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) {
rid = RIB_ADJ_IN;
} else if (req->flags & F_CTL_ADJ_OUT) {
- rid = RIB_ADJ_OUT;
+ struct rde_peer *peer;
+
+ peer = peer_match(&req->neighbor, 0);
+ if (peer == NULL) {
+ log_warnx("%s: no peer found for adj-rib-out",
+ __func__);
+ error = CTL_RES_NOSUCHPEER;
+ imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1,
+ &error, sizeof(error));
+ free(ctx);
+ return;
+ }
+ ctx->peerid = peer->conf.id;
+ switch (ctx->req.type) {
+ case IMSG_CTL_SHOW_RIB:
+ if (prefix_dump_new(peer, ctx->req.aid,
+ CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall,
+ rde_dump_done, rde_dump_throttled) == -1)
+ goto nomem;
+ break;
+ case IMSG_CTL_SHOW_RIB_PREFIX:
+ if (req->flags & F_LONGER) {
+ if (prefix_dump_new(peer, ctx->req.aid,
+ CTL_MSG_HIGH_MARK, ctx,
+ rde_dump_adjout_prefix_upcall,
+ rde_dump_done, rde_dump_throttled) == -1)
+ goto nomem;
+ break;
+ }
+ switch (req->prefix.aid) {
+ case AID_INET:
+ case AID_VPN_IPv4:
+ hostplen = 32;
+ break;
+ case AID_INET6:
+ case AID_VPN_IPv6:
+ hostplen = 128;
+ break;
+ default:
+ fatalx("%s: unknown af", __func__);
+ }
+
+ do {
+ if (req->prefixlen == hostplen)
+ p = prefix_match(peer, &req->prefix);
+ else
+ p = prefix_lookup(peer, &req->prefix,
+ req->prefixlen);
+ if (p)
+ rde_dump_adjout_upcall(p, ctx);
+ } while ((peer = peer_match(&req->neighbor,
+ peer->conf.id)));
+
+ imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
+ -1, NULL, 0);
+ free(ctx);
+ return;
+ default:
+ fatalx("%s: unsupported imsg type", __func__);
+ }
+
+ LIST_INSERT_HEAD(&rde_dump_h, ctx, entry);
+ return;
} else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) {
- log_warnx("rde_dump_ctx_new: no such rib %s", req->rib);
+ log_warnx("%s: no such rib %s", __func__, req->rib);
error = CTL_RES_NOSUCHRIB;
imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
sizeof(error));
@@ -2429,10 +2558,6 @@ rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid,
return;
}
- memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request));
- ctx->req.pid = pid;
- ctx->req.type = type;
- ctx->rid = rid;
switch (ctx->req.type) {
case IMSG_CTL_SHOW_NETWORK:
if (rib_dump_new(rid, ctx->req.aid, CTL_MSG_HIGH_MARK, ctx,
@@ -2463,10 +2588,10 @@ rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid,
hostplen = 128;
break;
default:
- fatalx("rde_dump_ctx_new: unknown af");
+ fatalx("%s: unknown af", __func__);
}
if (req->prefixlen == hostplen)
- re = rib_lookup(rib_byid(rid), &req->prefix);
+ re = rib_match(rib_byid(rid), &req->prefix);
else
re = rib_get(rib_byid(rid), &req->prefix,
req->prefixlen);
@@ -2502,21 +2627,7 @@ rde_dump_ctx_terminate(pid_t pid)
LIST_FOREACH(ctx, &rde_dump_h, entry) {
if (ctx->req.pid == pid) {
- void (*upcall)(struct rib_entry *, void *);
- switch (ctx->req.type) {
- case IMSG_CTL_SHOW_NETWORK:
- upcall = network_dump_upcall;
- break;
- case IMSG_CTL_SHOW_RIB:
- upcall = rde_dump_upcall;
- break;
- case IMSG_CTL_SHOW_RIB_PREFIX:
- upcall = rde_dump_prefix_upcall;
- break;
- default:
- fatalx("%s: unsupported imsg type", __func__);
- }
- rib_dump_terminate(ctx->rid, ctx, upcall);
+ rib_dump_terminate(ctx);
return;
}
}
@@ -2697,16 +2808,9 @@ rde_up_dump_upcall(struct rib_entry *re, void *ptr)
}
static void
-rde_up_flush_upcall(struct rib_entry *re, void *ptr)
+rde_up_flush_upcall(struct prefix *p, void *ptr)
{
- struct rde_peer *peer = ptr;
- struct prefix *p, *np;
-
- LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
- if (peer != prefix_peer(p))
- continue;
- up_generate_updates(out_rules, peer, NULL, p);
- }
+ up_generate_updates(out_rules, prefix_peer(p), NULL, p);
}
static void
@@ -3011,18 +3115,16 @@ rde_reload_done(void)
peer->reconf_out = 0;
peer->reconf_rib = 0;
if (peer->loc_rib_id != rib_find(peer->conf.rib)) {
- char *p = log_fmt_peer(&peer->conf);
- log_debug("rib change: reloading peer %s", p);
- free(p);
+ log_peer_info(&peer->conf, "rib change, reloading");
peer->loc_rib_id = rib_find(peer->conf.rib);
if (peer->loc_rib_id == RIB_NOTFOUND)
fatalx("King Bula's peer met an unknown RIB");
peer->reconf_rib = 1;
softreconfig++;
- if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC,
- RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall,
+ if (prefix_dump_new(peer, AID_UNSPEC,
+ RDE_RUNNER_ROUNDS, NULL, rde_up_flush_upcall,
rde_softreconfig_in_done, NULL) == -1)
- fatal("%s: rib_dump_new", __func__);
+ fatal("%s: prefix_dump_new", __func__);
log_peer_info(&peer->conf, "flushing Adj-RIB-Out");
continue;
}
@@ -3197,7 +3299,7 @@ rde_softreconfig_in(struct rib_entry *re, void *bula)
pt = re->prefix;
pt_getaddr(pt, &prefix);
- LIST_FOREACH(p, &re->prefix_h, rib_l) {
+ LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
asp = prefix_aspath(p);
peer = prefix_peer(p);
force_eval = 0;
@@ -3358,6 +3460,35 @@ peer_get(u_int32_t id)
}
struct rde_peer *
+peer_match(struct ctl_neighbor *n, u_int32_t peerid)
+{
+ struct rde_peer_head *head;
+ struct rde_peer *peer;
+ u_int32_t i = 0;
+
+ if (peerid != 0)
+ i = peerid & peertable.peer_hashmask;
+
+ while (i <= peertable.peer_hashmask) {
+ head = &peertable.peer_hashtbl[i];
+ LIST_FOREACH(peer, head, hash_l) {
+ /* skip peers until peerid is found */
+ if (peerid == peer->conf.id) {
+ peerid = 0;
+ continue;
+ }
+ if (peerid != 0)
+ continue;
+
+ if (rde_match_peer(peer, n))
+ return (peer);
+ }
+ i++;
+ }
+ return (NULL);
+}
+
+struct rde_peer *
peer_add(u_int32_t id, struct peer_config *p_conf)
{
struct rde_peer_head *head;
@@ -3441,17 +3572,9 @@ peer_localaddrs(struct rde_peer *peer, struct bgpd_addr *laddr)
}
static void
-peer_adjout_flush_upcall(struct rib_entry *re, void *arg)
+peer_adjout_clear_upcall(struct prefix *p, void *arg)
{
- struct rde_peer *peer = arg;
- struct prefix *p, *np;
-
- LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
- if (peer != prefix_peer(p))
- continue;
- prefix_destroy(p);
- break; /* optimization, only one match per peer possible */
- }
+ prefix_adjout_destroy(p);
}
void
@@ -3472,9 +3595,9 @@ peer_up(u_int32_t id, struct session_up *sup)
* There is a race condition when doing PEER_ERR -> PEER_DOWN.
* So just do a full reset of the peer here.
*/
- if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, 0, peer,
- peer_adjout_flush_upcall, NULL, NULL) == -1)
- fatal("%s: rib_dump_new", __func__);
+ if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
+ peer_adjout_clear_upcall, NULL, NULL) == -1)
+ fatal("%s: prefix_dump_new", __func__);
peer_flush(peer, AID_UNSPEC, 0);
peer->prefix_cnt = 0;
peer->state = PEER_DOWN;
@@ -3519,12 +3642,12 @@ peer_down(u_int32_t id)
peer->remote_bgpid = 0;
peer->state = PEER_DOWN;
/* stop all pending dumps which may depend on this peer */
- rib_dump_terminate(peer->loc_rib_id, peer, rde_up_dump_upcall);
+ rib_dump_terminate(peer);
/* flush Adj-RIB-Out for this peer */
- if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, 0, peer,
- peer_adjout_flush_upcall, NULL, NULL) == -1)
- fatal("%s: rib_dump_new", __func__);
+ if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
+ peer_adjout_clear_upcall, NULL, NULL) == -1)
+ fatal("%s: prefix_dump_new", __func__);
peer_flush(peer, AID_UNSPEC, 0);
@@ -3553,7 +3676,7 @@ peer_flush_upcall(struct rib_entry *re, void *arg)
pt_getaddr(re->prefix, &addr);
prefixlen = re->prefix->prefixlen;
- LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
+ LIST_FOREACH_SAFE(p, &re->prefix_h, entry.list.rib, np) {
if (peer != prefix_peer(p))
continue;
if (staletime && p->lastchange > staletime)
@@ -3888,7 +4011,7 @@ network_dump_upcall(struct rib_entry *re, void *ptr)
struct bgpd_addr addr;
struct rde_dump_ctx *ctx = ptr;
- LIST_FOREACH(p, &re->prefix_h, rib_l) {
+ LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
asp = prefix_aspath(p);
if (!(asp->flags & F_PREFIX_ANNOUNCED))
continue;
@@ -3925,7 +4048,7 @@ network_flush_upcall(struct rib_entry *re, void *ptr)
pt_getaddr(re->prefix, &addr);
prefixlen = re->prefix->prefixlen;
- LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
+ LIST_FOREACH_SAFE(p, &re->prefix_h, entry.list.rib, np) {
if (prefix_peer(p) != peer)
continue;
asp = prefix_aspath(p);
@@ -3966,9 +4089,6 @@ rde_shutdown(void)
while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL)
peer_down(p->conf.id);
- /* then since decision process is off, kill RIB_ADJ_OUT */
- rib_free(rib_byid(RIB_ADJ_OUT));
-
/* free filters */
filterlist_free(out_rules);
filterlist_free(out_rules_tmp);
diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h
index 9d0f4978a17..a0ff7842b53 100644
--- a/usr.sbin/bgpd/rde.h
+++ b/usr.sbin/bgpd/rde.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: rde.h,v 1.219 2019/07/01 07:07:08 claudio Exp $ */
+/* $OpenBSD: rde.h,v 1.220 2019/07/17 10:13:26 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> and
@@ -57,8 +57,7 @@ struct rib {
};
#define RIB_ADJ_IN 0
-#define RIB_ADJ_OUT 1
-#define RIB_LOC_START 2
+#define RIB_LOC_START 1
#define RIB_NOTFOUND 0xffff
struct rib_desc {
@@ -78,6 +77,7 @@ LIST_HEAD(aspath_list, aspath);
LIST_HEAD(attr_list, attr);
LIST_HEAD(aspath_head, rde_aspath);
RB_HEAD(prefix_tree, prefix);
+RB_HEAD(prefix_index, prefix);
struct rde_peer {
LIST_ENTRY(rde_peer) hash_l; /* hash list over all peers */
@@ -87,6 +87,7 @@ struct rde_peer {
struct bgpd_addr local_v4_addr;
struct bgpd_addr local_v6_addr;
struct capabilities capa;
+ struct prefix_index adj_rib_out;
struct prefix_tree updates[AID_MAX];
struct prefix_tree withdraws[AID_MAX];
time_t staletime[AID_MAX];
@@ -306,8 +307,14 @@ struct pt_entry_vpn6 {
};
struct prefix {
- LIST_ENTRY(prefix) rib_l, nexthop_l;
- RB_ENTRY(prefix) entry;
+ union {
+ struct {
+ LIST_ENTRY(prefix) rib, nexthop;
+ } list;
+ struct {
+ RB_ENTRY(prefix) index, update;
+ } tree;
+ } entry;
struct pt_entry *pt;
struct rib_entry *re;
struct rde_aspath *aspath;
@@ -317,12 +324,17 @@ struct prefix {
time_t lastchange;
u_int8_t validation_state;
u_int8_t nhflags;
- u_int8_t flags;
u_int8_t eor;
-#define PREFIX_FLAG_WITHDRAW 0x01
-#define PREFIX_FLAG_UPDATE 0x02
+ u_int8_t flags;
+#define PREFIX_FLAG_WITHDRAW 0x01 /* queued for withdraw */
+#define PREFIX_FLAG_UPDATE 0x02 /* queued for update */
+#define PREFIX_FLAG_DEAD 0x04 /* locked but removed */
+#define PREFIX_FLAG_MASK 0x07 /* mask for the three prefix types */
+#define PREFIX_NEXTHOP_LINKED 0x40 /* prefix is linked onto nexthop list */
+#define PREFIX_FLAG_LOCKED 0x80 /* locked by rib walker */
};
+/* possible states for nhflags */
#define NEXTHOP_SELF 0x01
#define NEXTHOP_REJECT 0x02
#define NEXTHOP_BLACKHOLE 0x04
@@ -356,6 +368,7 @@ u_int32_t rde_local_as(void);
int rde_noevaluate(void);
int rde_decisionflags(void);
int rde_as4byte(struct rde_peer *);
+struct rde_peer *peer_get(u_int32_t);
/* rde_attr.c */
int attr_write(void *, u_int16_t, u_int8_t, u_int8_t, void *,
@@ -395,6 +408,7 @@ u_char *aspath_override(struct aspath *, u_int32_t, u_int32_t,
u_int16_t *);
int aspath_lenmatch(struct aspath *, enum aslen_spec, u_int);
+/* rde_community.c */
int community_match(struct rde_community *, struct community *,
struct rde_peer *);
int community_set(struct rde_community *, struct community *,
@@ -499,15 +513,14 @@ struct rib_desc *rib_desc(struct rib *);
void rib_free(struct rib *);
void rib_shutdown(void);
struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int);
-struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *);
+struct rib_entry *rib_match(struct rib *, struct bgpd_addr *);
int rib_dump_pending(void);
void rib_dump_runner(void);
int rib_dump_new(u_int16_t, u_int8_t, unsigned int, void *,
void (*)(struct rib_entry *, void *),
void (*)(void *, u_int8_t),
int (*)(void *));
-void rib_dump_terminate(u_int16_t, void *,
- void (*)(struct rib_entry *, void *));
+void rib_dump_terminate(void *);
static inline struct rib *
re_rib(struct rib_entry *re)
@@ -540,13 +553,20 @@ void path_put(struct rde_aspath *);
#define PREFIX_SIZE(x) (((x) + 7) / 8 + 1)
struct prefix *prefix_get(struct rib *, struct rde_peer *,
struct bgpd_addr *, int);
+struct prefix *prefix_lookup(struct rde_peer *, struct bgpd_addr *, int);
+struct prefix *prefix_match(struct rde_peer *, struct bgpd_addr *);
int prefix_remove(struct rib *, struct rde_peer *,
struct bgpd_addr *, int);
void prefix_add_eor(struct rde_peer *, u_int8_t);
-void prefix_update(struct rib *, struct rde_peer *,
- struct bgpd_addr *, int);
-int prefix_withdraw(struct rib *, struct rde_peer *,
- struct bgpd_addr *, int);
+int prefix_update(struct rde_peer *, struct filterstate *,
+ struct bgpd_addr *, int, u_int8_t);
+int prefix_withdraw(struct rde_peer *, struct bgpd_addr *, int);
+void prefix_adjout_destroy(struct prefix *p);
+void prefix_adjout_dump(struct rde_peer *, void *,
+ void (*)(struct prefix *, void *));
+int prefix_dump_new(struct rde_peer *, u_int8_t, unsigned int,
+ void *, void (*)(struct prefix *, void *),
+ void (*)(void *, u_int8_t), int (*)(void *));
int prefix_write(u_char *, int, struct bgpd_addr *, u_int8_t, int);
int prefix_writebuf(struct ibuf *, struct bgpd_addr *, u_int8_t);
struct prefix *prefix_bypeer(struct rib_entry *, struct rde_peer *);
diff --git a/usr.sbin/bgpd/rde_decide.c b/usr.sbin/bgpd/rde_decide.c
index 47110cf1880..e2d2327b1a6 100644
--- a/usr.sbin/bgpd/rde_decide.c
+++ b/usr.sbin/bgpd/rde_decide.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: rde_decide.c,v 1.74 2019/01/21 02:07:56 claudio Exp $ */
+/* $OpenBSD: rde_decide.c,v 1.75 2019/07/17 10:13:26 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org>
@@ -245,7 +245,7 @@ prefix_evaluate(struct prefix *p, struct rib_entry *re)
if (re_rib(re)->flags & F_RIB_NOEVALUATE || rde_noevaluate()) {
/* decision process is turned off */
if (p != NULL)
- LIST_INSERT_HEAD(&re->prefix_h, p, rib_l);
+ LIST_INSERT_HEAD(&re->prefix_h, p, entry.list.rib);
if (re->active != NULL)
re->active = NULL;
return;
@@ -253,15 +253,18 @@ prefix_evaluate(struct prefix *p, struct rib_entry *re)
if (p != NULL) {
if (LIST_EMPTY(&re->prefix_h))
- LIST_INSERT_HEAD(&re->prefix_h, p, rib_l);
+ LIST_INSERT_HEAD(&re->prefix_h, p, entry.list.rib);
else {
- LIST_FOREACH(xp, &re->prefix_h, rib_l) {
+ LIST_FOREACH(xp, &re->prefix_h, entry.list.rib) {
if (prefix_cmp(p, xp) > 0) {
- LIST_INSERT_BEFORE(xp, p, rib_l);
+ LIST_INSERT_BEFORE(xp, p,
+ entry.list.rib);
break;
- } else if (LIST_NEXT(xp, rib_l) == NULL) {
+ } else if (LIST_NEXT(xp, entry.list.rib) ==
+ NULL) {
/* if xp last element ... */
- LIST_INSERT_AFTER(xp, p, rib_l);
+ LIST_INSERT_AFTER(xp, p,
+ entry.list.rib);
break;
}
}
diff --git a/usr.sbin/bgpd/rde_rib.c b/usr.sbin/bgpd/rde_rib.c
index 5e3e3221746..3b57f2d3fd4 100644
--- a/usr.sbin/bgpd/rde_rib.c
+++ b/usr.sbin/bgpd/rde_rib.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: rde_rib.c,v 1.198 2019/07/01 14:47:56 claudio Exp $ */
+/* $OpenBSD: rde_rib.c,v 1.199 2019/07/17 10:13:26 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org>
@@ -52,8 +52,10 @@ RB_GENERATE(rib_tree, rib_entry, rib_e, rib_compare);
struct rib_context {
LIST_ENTRY(rib_context) entry;
struct rib_entry *ctx_re;
- u_int16_t ctx_rib_id;
- void (*ctx_upcall)(struct rib_entry *, void *);
+ struct prefix *ctx_p;
+ u_int32_t ctx_id;
+ void (*ctx_rib_call)(struct rib_entry *, void *);
+ void (*ctx_prefix_call)(struct prefix *, void *);
void (*ctx_done)(void *, u_int8_t);
int (*ctx_throttle)(void *);
void *ctx_arg;
@@ -69,6 +71,7 @@ static int prefix_add(struct bgpd_addr *, int, struct rib *,
static int prefix_move(struct prefix *, struct rde_peer *,
struct rde_aspath *, struct rde_community *,
struct nexthop *, u_int8_t, u_int8_t);
+static void prefix_dump_r(struct rib_context *);
static inline struct rib_entry *
re_lock(struct rib_entry *re)
@@ -128,7 +131,7 @@ rib_new(char *name, u_int rtableid, u_int16_t flags)
rib_size = id + 1;
}
- bzero(&ribs[id], sizeof(struct rib_desc));
+ memset(&ribs[id], 0, sizeof(struct rib_desc));
strlcpy(ribs[id].name, name, sizeof(ribs[id].name));
RB_INIT(rib_tree(&ribs[id].rib));
ribs[id].state = RECONF_REINIT;
@@ -196,7 +199,7 @@ rib_free(struct rib *rib)
*/
while ((p = LIST_FIRST(&re->prefix_h))) {
struct rde_aspath *asp = prefix_aspath(p);
- np = LIST_NEXT(p, rib_l);
+ np = LIST_NEXT(p, entry.list.rib);
if (asp && asp->pftableid) {
struct bgpd_addr addr;
@@ -215,7 +218,7 @@ rib_free(struct rib *rib)
rd = &ribs[rib->id];
filterlist_free(rd->in_rules_tmp);
filterlist_free(rd->in_rules);
- bzero(rd, sizeof(struct rib_desc));
+ memset(rd, 0, sizeof(struct rib_desc));
}
void
@@ -235,7 +238,7 @@ rib_shutdown(void)
struct rib_desc *rd = &ribs[id];
filterlist_free(rd->in_rules_tmp);
filterlist_free(rd->in_rules);
- bzero(rd, sizeof(struct rib_desc));
+ memset(rd, 0, sizeof(struct rib_desc));
}
free(ribs);
}
@@ -247,7 +250,7 @@ rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
struct pt_entry *pte;
pte = pt_fill(prefix, prefixlen);
- bzero(&xre, sizeof(xre));
+ memset(&xre, 0, sizeof(xre));
xre.prefix = pte;
re = RB_FIND(rib_tree, rib_tree(rib), &xre);
@@ -258,7 +261,7 @@ rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
}
struct rib_entry *
-rib_lookup(struct rib *rib, struct bgpd_addr *addr)
+rib_match(struct rib *rib, struct bgpd_addr *addr)
{
struct rib_entry *re;
int i;
@@ -281,7 +284,7 @@ rib_lookup(struct rib *rib, struct bgpd_addr *addr)
}
break;
default:
- fatalx("rib_lookup: unknown af");
+ fatalx("%s: unknown af", __func__);
}
return (NULL);
}
@@ -367,9 +370,9 @@ rib_dump_r(struct rib_context *ctx)
struct rib *rib;
unsigned int i;
- rib = rib_byid(ctx->ctx_rib_id);
+ rib = rib_byid(ctx->ctx_id);
if (rib == NULL)
- fatalx("%s: rib id %u gone", __func__, ctx->ctx_rib_id);
+ fatalx("%s: rib id %u gone", __func__, ctx->ctx_id);
if (ctx->ctx_re == NULL)
re = RB_MIN(rib_tree, rib_tree(rib));
@@ -378,9 +381,9 @@ rib_dump_r(struct rib_context *ctx)
for (i = 0; re != NULL; re = next) {
next = RB_NEXT(rib_tree, unused, re);
- if (re->rib_id != ctx->ctx_rib_id)
+ if (re->rib_id != ctx->ctx_id)
fatalx("%s: Unexpected RIB %u != %u.", __func__,
- re->rib_id, ctx->ctx_rib_id);
+ re->rib_id, ctx->ctx_id);
if (ctx->ctx_aid != AID_UNSPEC &&
ctx->ctx_aid != re->prefix->aid)
continue;
@@ -391,7 +394,7 @@ rib_dump_r(struct rib_context *ctx)
re_lock(re);
return;
}
- ctx->ctx_upcall(re, ctx->ctx_arg);
+ ctx->ctx_rib_call(re, ctx->ctx_arg);
}
if (ctx->ctx_done)
@@ -422,7 +425,10 @@ rib_dump_runner(void)
LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
if (ctx->ctx_throttle && ctx->ctx_throttle(ctx->ctx_arg))
continue;
- rib_dump_r(ctx);
+ if (ctx->ctx_rib_call != NULL)
+ rib_dump_r(ctx);
+ else
+ prefix_dump_r(ctx);
}
}
@@ -432,7 +438,7 @@ rib_dump_abort(u_int16_t id)
struct rib_context *ctx, *next;
LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
- if (id != ctx->ctx_rib_id)
+ if (id != ctx->ctx_id)
continue;
if (ctx->ctx_done)
ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
@@ -450,11 +456,11 @@ rib_dump_new(u_int16_t id, u_int8_t aid, unsigned int count, void *arg,
if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
return -1;
- ctx->ctx_rib_id = id;
+ ctx->ctx_id = id;
ctx->ctx_aid = aid;
ctx->ctx_count = count;
ctx->ctx_arg = arg;
- ctx->ctx_upcall = upcall;
+ ctx->ctx_rib_call = upcall;
ctx->ctx_done = done;
ctx->ctx_throttle = throttle;
@@ -468,14 +474,12 @@ rib_dump_new(u_int16_t id, u_int8_t aid, unsigned int count, void *arg,
}
void
-rib_dump_terminate(u_int16_t id, void *arg,
- void (*upcall)(struct rib_entry *, void *))
+rib_dump_terminate(void *arg)
{
struct rib_context *ctx, *next;
LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
- if (id != ctx->ctx_rib_id || ctx->ctx_arg != arg ||
- ctx->ctx_upcall != upcall)
+ if (ctx->ctx_arg != arg)
continue;
if (ctx->ctx_done)
ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
@@ -610,15 +614,6 @@ path_update(struct rib *rib, struct rde_peer *peer, struct filterstate *state,
p->validation_state = vstate;
return (2);
}
- if (p->flags) {
- struct prefix_tree *prefix_head;
- /* prefix is a pending update */
- prefix_head = p->flags & PREFIX_FLAG_UPDATE ?
- &peer->updates[prefix->aid] :
- &peer->withdraws[prefix->aid];
- RB_REMOVE(prefix_tree, prefix_head, p);
- p->flags = 0;
- }
}
/*
@@ -881,7 +876,14 @@ prefix_cmp(struct prefix *a, struct prefix *b)
return pt_prefix_cmp(a->pt, b->pt);
}
-RB_GENERATE(prefix_tree, prefix, entry, prefix_cmp)
+static inline int
+prefix_index_cmp(struct prefix *a, struct prefix *b)
+{
+ return pt_prefix_cmp(a->pt, b->pt);
+}
+
+RB_GENERATE(prefix_tree, prefix, entry.tree.update, prefix_cmp)
+RB_GENERATE_STATIC(prefix_index, prefix, entry.tree.index, prefix_index_cmp)
/*
* search for specified prefix of a peer. Returns NULL if not found.
@@ -899,6 +901,52 @@ prefix_get(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix,
}
/*
+ * lookup prefix in the peer prefix_index. Returns NULL if not found.
+ */
+struct prefix *
+prefix_lookup(struct rde_peer *peer, struct bgpd_addr *prefix,
+ int prefixlen)
+{
+ struct prefix xp;
+ struct pt_entry *pte;
+
+ memset(&xp, 0, sizeof(xp));
+ pte = pt_fill(prefix, prefixlen);
+ xp.pt = pte;
+
+ return RB_FIND(prefix_index, &peer->adj_rib_out, &xp);
+}
+
+struct prefix *
+prefix_match(struct rde_peer *peer, struct bgpd_addr *addr)
+{
+ struct prefix *p;
+ int i;
+
+ switch (addr->aid) {
+ case AID_INET:
+ case AID_VPN_IPv4:
+ for (i = 32; i >= 0; i--) {
+ p = prefix_lookup(peer, addr, i);
+ if (p != NULL)
+ return p;
+ }
+ break;
+ case AID_INET6:
+ case AID_VPN_IPv6:
+ for (i = 128; i >= 0; i--) {
+ p = prefix_lookup(peer, addr, i);
+ if (p != NULL)
+ return p;
+ }
+ break;
+ default:
+ fatalx("%s: unknown af", __func__);
+ }
+ return NULL;
+}
+
+/*
* Adds or updates a prefix.
*/
static int
@@ -936,8 +984,8 @@ prefix_move(struct prefix *p, struct rde_peer *peer,
np->aspath = path_ref(asp);
np->communities = communities_ref(comm);
np->peer = peer;
- np->pt = p->pt; /* skip refcnt update since ref is moved */
np->re = p->re;
+ np->pt = p->pt; /* skip refcnt update since ref is moved */
np->validation_state = vstate;
np->nhflags = nhflags;
np->nexthop = nexthop_ref(nexthop);
@@ -957,7 +1005,7 @@ prefix_move(struct prefix *p, struct rde_peer *peer,
* This is safe because we create a new prefix and so the change
* is noticed by prefix_evaluate().
*/
- LIST_REMOVE(p, rib_l);
+ LIST_REMOVE(p, entry.list.rib);
prefix_evaluate(np, np->re);
/* remove old prefix node */
@@ -1020,26 +1068,97 @@ prefix_add_eor(struct rde_peer *peer, u_int8_t aid)
if (RB_INSERT(prefix_tree, &peer->updates[aid], p) != NULL)
/* no need to add if EoR marker already present */
prefix_free(p);
+ /* EOR marker is not inserted into the adj_rib_out index */
}
/*
* Put a prefix from the Adj-RIB-Out onto the update queue.
*/
-void
-prefix_update(struct rib *rib, struct rde_peer *peer,
- struct bgpd_addr *prefix, int prefixlen)
+int
+prefix_update(struct rde_peer *peer, struct filterstate *state,
+ struct bgpd_addr *prefix, int prefixlen, u_int8_t vstate)
{
+ struct prefix_tree *prefix_head = NULL;
+ struct rde_aspath *asp;
+ struct rde_community *comm;
struct prefix *p;
+ int created = 0;
+
+ if ((p = prefix_lookup(peer, prefix, prefixlen)) != NULL) {
+ /* prefix is already in the Adj-RIB-Out */
+ if (p->flags & PREFIX_FLAG_WITHDRAW) {
+ created = 1; /* consider this a new entry */
+ peer->up_wcnt--;
+ prefix_head = &peer->withdraws[prefix->aid];
+ RB_REMOVE(prefix_tree, prefix_head, p);
+ } else if (p->flags & PREFIX_FLAG_DEAD) {
+ created = 1; /* consider this a new entry */
+ } else {
+ if (prefix_nhflags(p) == state->nhflags &&
+ prefix_nexthop(p) == state->nexthop &&
+ communities_equal(&state->communities,
+ prefix_communities(p)) &&
+ path_compare(&state->aspath, prefix_aspath(p)) ==
+ 0) {
+ /* nothing changed */
+ p->validation_state = vstate;
+ p->lastchange = time(NULL);
+ return 0;
+ }
- p = prefix_get(rib, peer, prefix, prefixlen);
- if (p == NULL) /* Got a dummy withdrawn request. */
- return;
+ if (p->flags & PREFIX_FLAG_UPDATE) {
+ /* created = 0 so up_nlricnt is not increased */
+ prefix_head = &peer->updates[prefix->aid];
+ RB_REMOVE(prefix_tree, prefix_head, p);
+ }
+ }
+ /* unlink from aspath and remove nexthop ref */
+ nexthop_unref(p->nexthop);
+ communities_unref(p->communities);
+ path_unref(p->aspath);
+ p->flags &= ~PREFIX_FLAG_MASK;
+
+ /* peer and pt remain */
+ } else {
+ p = prefix_alloc();
+ created = 1;
+
+ p->pt = pt_get(prefix, prefixlen);
+ if (p->pt == NULL)
+ fatalx("%s: update for non existing prefix", __func__);
+ pt_ref(p->pt);
+ p->peer = peer;
+
+ if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL)
+ fatalx("%s: RB index invariant violated", __func__);
+ }
- if (p->flags != 0)
+ if ((asp = path_lookup(&state->aspath)) == NULL) {
+ /* Path not available, create and link a new one. */
+ asp = path_copy(path_get(), &state->aspath);
+ path_link(asp);
+ }
+
+ if ((comm = communities_lookup(&state->communities)) == NULL) {
+ /* Communities not available, create and link a new one. */
+ comm = communities_link(&state->communities);
+ }
+
+ p->aspath = path_ref(asp);
+ p->communities = communities_ref(comm);
+ p->nexthop = nexthop_ref(state->nexthop);
+ p->nhflags = state->nhflags;
+
+ p->validation_state = vstate;
+ p->lastchange = time(NULL);
+
+ if (p->flags & PREFIX_FLAG_MASK)
fatalx("%s: bad flags %x", __func__, p->flags);
- p->flags = PREFIX_FLAG_UPDATE;
+ p->flags |= PREFIX_FLAG_UPDATE;
if (RB_INSERT(prefix_tree, &peer->updates[prefix->aid], p) != NULL)
fatalx("%s: RB tree invariant violated", __func__);
+
+ return created;
}
/*
@@ -1047,15 +1166,19 @@ prefix_update(struct rib *rib, struct rde_peer *peer,
* the prefix in the RIB linked to the peer withdraw list.
*/
int
-prefix_withdraw(struct rib *rib, struct rde_peer *peer,
- struct bgpd_addr *prefix, int prefixlen)
+prefix_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix, int prefixlen)
{
struct prefix *p;
- p = prefix_get(rib, peer, prefix, prefixlen);
+ p = prefix_lookup(peer, prefix, prefixlen);
if (p == NULL) /* Got a dummy withdrawn request. */
return (0);
+ /* remove nexthop ref ... */
+ nexthop_unref(p->nexthop);
+ p->nexthop = NULL;
+ p->nhflags = 0;
+
/* unlink from aspath ...*/
path_unref(p->aspath);
p->aspath = NULL;
@@ -1063,29 +1186,181 @@ prefix_withdraw(struct rib *rib, struct rde_peer *peer,
/* ... communities ... */
communities_unref(p->communities);
p->communities = NULL;
+ /* and unlink from aspath */
+ path_unref(p->aspath);
+ p->aspath = NULL;
+ /* re already NULL */
- /* ... and nexthop but keep the re link */
- nexthop_unlink(p);
- nexthop_unref(p->nexthop);
- p->nexthop = NULL;
- p->nhflags = 0;
- /* re link still exists */
+ p->lastchange = time(NULL);
- if (p->flags) {
+ if (p->flags & PREFIX_FLAG_MASK) {
struct prefix_tree *prefix_head;
/* p is a pending update or withdraw, remove first */
prefix_head = p->flags & PREFIX_FLAG_UPDATE ?
&peer->updates[prefix->aid] :
&peer->withdraws[prefix->aid];
RB_REMOVE(prefix_tree, prefix_head, p);
- p->flags = 0;
+ p->flags &= ~PREFIX_FLAG_MASK;
}
- p->flags = PREFIX_FLAG_WITHDRAW;
+ p->flags |= PREFIX_FLAG_WITHDRAW;
if (RB_INSERT(prefix_tree, &peer->withdraws[prefix->aid], p) != NULL)
fatalx("%s: RB tree invariant violated", __func__);
return (1);
}
+static inline void
+prefix_lock(struct prefix *p)
+{
+ if (p->flags & PREFIX_FLAG_LOCKED)
+ fatalx("%s: locking locked prefix", __func__);
+ p->flags |= PREFIX_FLAG_LOCKED;
+}
+
+static inline void
+prefix_unlock(struct prefix *p)
+{
+ if ((p->flags & PREFIX_FLAG_LOCKED) == 0)
+ fatalx("%s: unlocking unlocked prefix", __func__);
+ p->flags &= ~PREFIX_FLAG_LOCKED;
+}
+
+static inline int
+prefix_is_locked(struct prefix *p)
+{
+ return (p->flags & PREFIX_FLAG_LOCKED) != 0;
+}
+
+static inline int
+prefix_is_dead(struct prefix *p)
+{
+ return (p->flags & PREFIX_FLAG_DEAD) != 0;
+}
+
+static struct prefix *
+prefix_restart(struct rib_context *ctx)
+{
+ struct prefix *p;
+
+ p = ctx->ctx_p;
+ prefix_unlock(p);
+
+ if (prefix_is_dead(p)) {
+ struct prefix *next;
+
+ next = RB_NEXT(prefix_index, unused, p);
+ prefix_adjout_destroy(p);
+ p = next;
+ }
+ return p;
+}
+
+void
+prefix_adjout_destroy(struct prefix *p)
+{
+ struct rde_peer *peer = prefix_peer(p);
+
+ if (p->eor) {
+ /* EOR marker is not linked in the index */
+ prefix_free(p);
+ return;
+ }
+
+ if (p->flags & PREFIX_FLAG_WITHDRAW)
+ RB_REMOVE(prefix_tree, &peer->withdraws[p->pt->aid], p);
+ else if (p->flags & PREFIX_FLAG_UPDATE)
+ RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p);
+ /* nothing needs to be done for PREFIX_FLAG_DEAD */
+ p->flags &= ~PREFIX_FLAG_MASK;
+
+
+ if (prefix_is_locked(p)) {
+ /* remove nexthop ref ... */
+ nexthop_unref(p->nexthop);
+ p->nexthop = NULL;
+ /* ... communities ... */
+ communities_unref(p->communities);
+ p->communities = NULL;
+ /* and unlink from aspath */
+ path_unref(p->aspath);
+ p->aspath = NULL;
+ p->nhflags = 0;
+ /* re already NULL */
+
+ /* finally mark prefix dead */
+ p->flags |= PREFIX_FLAG_DEAD;
+ return;
+ }
+
+ RB_REMOVE(prefix_index, &peer->adj_rib_out, p);
+
+ prefix_unlink(p);
+ prefix_free(p);
+}
+
+static void
+prefix_dump_r(struct rib_context *ctx)
+{
+ struct prefix *p, *next;
+ struct rde_peer *peer;
+ unsigned int i;
+
+ if ((peer = peer_get(ctx->ctx_id)) == NULL)
+ goto done;
+
+ if (ctx->ctx_p == NULL)
+ p = RB_MIN(prefix_index, &peer->adj_rib_out);
+ else
+ p = prefix_restart(ctx);
+
+ for (i = 0; p != NULL; p = next) {
+ next = RB_NEXT(prefix_index, unused, p);
+ if (prefix_is_dead(p))
+ continue;
+ if (ctx->ctx_aid != AID_UNSPEC &&
+ ctx->ctx_aid != p->pt->aid)
+ continue;
+ if (ctx->ctx_count && i++ >= ctx->ctx_count &&
+ !prefix_is_locked(p)) {
+ /* store and lock last element */
+ ctx->ctx_p = p;
+ prefix_lock(p);
+ return;
+ }
+ ctx->ctx_prefix_call(p, ctx->ctx_arg);
+ }
+
+done:
+ if (ctx->ctx_done)
+ ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
+ LIST_REMOVE(ctx, entry);
+ free(ctx);
+}
+
+int
+prefix_dump_new(struct rde_peer *peer, u_int8_t aid, unsigned int count,
+ void *arg, void (*upcall)(struct prefix *, void *),
+ void (*done)(void *, u_int8_t), int (*throttle)(void *))
+{
+ struct rib_context *ctx;
+
+ if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
+ return -1;
+ ctx->ctx_id = peer->conf.id;
+ ctx->ctx_aid = aid;
+ ctx->ctx_count = count;
+ ctx->ctx_arg = arg;
+ ctx->ctx_prefix_call = upcall;
+ ctx->ctx_done = done;
+ ctx->ctx_throttle = throttle;
+
+ LIST_INSERT_HEAD(&rib_dumps, ctx, entry);
+
+ /* requested a sync traversal */
+ if (count == 0)
+ prefix_dump_r(ctx);
+
+ return 0;
+}
/* dump a prefix into specified buffer */
int
@@ -1205,7 +1480,7 @@ prefix_bypeer(struct rib_entry *re, struct rde_peer *peer)
{
struct prefix *p;
- LIST_FOREACH(p, &re->prefix_h, rib_l)
+ LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
if (prefix_peer(p) == peer)
return (p);
return (NULL);
@@ -1237,7 +1512,7 @@ prefix_updateall(struct prefix *p, enum nexthop_state state,
}
/* redo the route decision */
- LIST_REMOVE(p, rib_l);
+ LIST_REMOVE(p, entry.list.rib);
/*
* If the prefix is the active one remove it first,
* this has to be done because we can not detect when
@@ -1255,6 +1530,10 @@ prefix_updateall(struct prefix *p, enum nexthop_state state,
void
prefix_destroy(struct prefix *p)
{
+ /* make route decision */
+ LIST_REMOVE(p, entry.list.rib);
+ prefix_evaluate(NULL, p->re);
+
prefix_unlink(p);
prefix_free(p);
}
@@ -1290,13 +1569,6 @@ prefix_unlink(struct prefix *p)
{
struct rib_entry *re = p->re;
- if (p->eor) /* nothing to unlink for EoR markers */
- return;
-
- /* make route decision */
- LIST_REMOVE(p, rib_l);
- prefix_evaluate(NULL, re);
-
/* destroy all references to other objects */
nexthop_unlink(p);
nexthop_unref(p->nexthop);
@@ -1310,7 +1582,7 @@ prefix_unlink(struct prefix *p)
p->re = NULL;
p->pt = NULL;
- if (rib_empty(re))
+ if (re && rib_empty(re))
rib_remove(re);
/*
@@ -1319,7 +1591,7 @@ prefix_unlink(struct prefix *p)
*/
}
-/* alloc and bzero new entry. May not fail. */
+/* alloc and zero new entry. May not fail. */
static struct prefix *
prefix_alloc(void)
{
@@ -1430,7 +1702,7 @@ nexthop_runner(void)
p = nh->next_prefix;
for (j = 0; p != NULL && j < RDE_RUNNER_ROUNDS; j++) {
prefix_updateall(p, nh->state, nh->oldstate);
- p = LIST_NEXT(p, nexthop_l);
+ p = LIST_NEXT(p, entry.list.nexthop);
}
/* prep for next run, if not finished readd to tail of queue */
@@ -1540,22 +1812,21 @@ nexthop_link(struct prefix *p)
if (re_rib(p->re)->flags & F_RIB_NOEVALUATE)
return;
- LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, nexthop_l);
+ p->flags |= PREFIX_NEXTHOP_LINKED;
+ LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, entry.list.nexthop);
}
void
nexthop_unlink(struct prefix *p)
{
- if (p->nexthop == NULL)
- return;
-
- if (re_rib(p->re)->flags & F_RIB_NOEVALUATE)
+ if (p->nexthop == NULL || (p->flags & PREFIX_NEXTHOP_LINKED) == 0)
return;
if (p == p->nexthop->next_prefix)
- p->nexthop->next_prefix = LIST_NEXT(p, nexthop_l);
+ p->nexthop->next_prefix = LIST_NEXT(p, entry.list.nexthop);
- LIST_REMOVE(p, nexthop_l);
+ p->flags &= ~PREFIX_NEXTHOP_LINKED;
+ LIST_REMOVE(p, entry.list.nexthop);
}
struct nexthop *
diff --git a/usr.sbin/bgpd/rde_update.c b/usr.sbin/bgpd/rde_update.c
index 6a22a44d337..9ca9ca58c92 100644
--- a/usr.sbin/bgpd/rde_update.c
+++ b/usr.sbin/bgpd/rde_update.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: rde_update.c,v 1.119 2019/07/02 12:07:00 claudio Exp $ */
+/* $OpenBSD: rde_update.c,v 1.120 2019/07/17 10:13:26 claudio Exp $ */
/*
* Copyright (c) 2004 Claudio Jeker <claudio@openbsd.org>
@@ -143,8 +143,7 @@ withdraw:
/* withdraw prefix */
pt_getaddr(old->pt, &addr);
- if (prefix_withdraw(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
- old->pt->prefixlen) == 1)
+ if (prefix_withdraw(peer, &addr, old->pt->prefixlen) == 1)
peer->up_wcnt++;
} else {
switch (up_test_update(peer, new)) {
@@ -165,13 +164,11 @@ withdraw:
}
pt_getaddr(new->pt, &addr);
- if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr,
- new->pt->prefixlen, prefix_vstate(new)) != 2) {
- /* only send update if path changed */
- prefix_update(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
- new->pt->prefixlen);
+
+ /* only send update if path changed */
+ if (prefix_update(peer, &state, &addr, new->pt->prefixlen,
+ prefix_vstate(new)) == 1)
peer->up_nlricnt++;
- }
rde_filterstate_clean(&state);
}
@@ -229,11 +226,8 @@ up_generate_default(struct filter_head *rules, struct rde_peer *peer,
return;
}
- if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr, 0,
- ROA_NOTFOUND) != 2) {
- prefix_update(&ribs[RIB_ADJ_OUT].rib, peer, &addr, 0);
+ if (prefix_update(peer, &state, &addr, 0, ROA_NOTFOUND) == 1)
peer->up_nlricnt++;
- }
/* no longer needed */
rde_filterstate_clean(&state);
@@ -576,8 +570,13 @@ up_is_eor(struct rde_peer *peer, u_int8_t aid)
p = RB_MIN(prefix_tree, &peer->updates[aid]);
if (p != NULL && p->eor) {
+ /*
+ * Need to remove eor from update tree because
+ * prefix_adjout_destroy() can't handle that.
+ */
RB_REMOVE(prefix_tree, &peer->updates[aid], p);
- prefix_destroy(p);
+ p->flags &= ~PREFIX_FLAG_MASK;
+ prefix_adjout_destroy(p);
return 1;
}
return 0;
@@ -616,11 +615,11 @@ up_dump_prefix(u_char *buf, int len, struct prefix_tree *prefix_head,
/* prefix sent, remove from list and clear flag */
RB_REMOVE(prefix_tree, prefix_head, p);
- p->flags = 0;
+ p->flags &= ~PREFIX_FLAG_MASK;
if (withdraw) {
/* prefix no longer needed, remove it */
- prefix_destroy(p);
+ prefix_adjout_destroy(p);
peer->up_wcnt--;
peer->prefix_sent_withdraw++;
} else {