diff options
author | Claudio Jeker <claudio@cvs.openbsd.org> | 2019-07-17 10:13:27 +0000 |
---|---|---|
committer | Claudio Jeker <claudio@cvs.openbsd.org> | 2019-07-17 10:13:27 +0000 |
commit | 51e8db3a8349db1ac1ff9fd3f4cc1897c2a9849d (patch) | |
tree | b9b081f8a1958b0003c13969af319d4fd258fdf2 | |
parent | ef536df52e0e823c0e3d7598a9f2edcf752e43e1 (diff) |
Change the Adj-RIB-Out to a per peer set of RB trees. The way RIB data
structures are linked does not scale for the Adj-RIB-Out and so inserts
and updates into the Adj-RIB-Out did not scale because of some linear
list traversals in hot paths.
A synthetic test with 4000 peers announcing one prefix each showed that
the initial convergence time dropped from around 1 hout to around 6min.
Note: because the Adj-RIB-Out is now per peer the order in which prefixes
are dumped in 'bgpctl show rib out' changed.
Tested and OK job@, benno@, phessler@
-rw-r--r-- | usr.sbin/bgpd/mrt.c | 13 | ||||
-rw-r--r-- | usr.sbin/bgpd/parse.y | 4 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde.c | 270 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde.h | 50 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde_decide.c | 17 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde_rib.c | 419 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde_update.c | 31 |
7 files changed, 604 insertions, 200 deletions
diff --git a/usr.sbin/bgpd/mrt.c b/usr.sbin/bgpd/mrt.c index 9c6dd170b92..cf302470222 100644 --- a/usr.sbin/bgpd/mrt.c +++ b/usr.sbin/bgpd/mrt.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mrt.c,v 1.97 2019/06/25 21:33:55 benno Exp $ */ +/* $OpenBSD: mrt.c,v 1.98 2019/07/17 10:13:26 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -512,15 +512,11 @@ mrt_dump_entry_v2(struct mrt *mrt, struct rib_entry *re, u_int32_t snum) goto fail; } nump = 0; - LIST_FOREACH(p, &re->prefix_h, rib_l) { + LIST_FOREACH(p, &re->prefix_h, entry.list.rib) { struct nexthop *nexthop; struct bgpd_addr *nh; struct ibuf *tbuf; - /* skip pending withdraw in Adj-RIB-Out */ - if (prefix_aspath(p) == NULL) - continue; - nexthop = prefix_nexthop(p); if (nexthop == NULL) { bzero(&addr, sizeof(struct bgpd_addr)); @@ -683,10 +679,7 @@ mrt_dump_upcall(struct rib_entry *re, void *ptr) * dumps the table so we do the same. If only the active route should * be dumped p should be set to p = pt->active. */ - LIST_FOREACH(p, &re->prefix_h, rib_l) { - /* skip pending withdraw in Adj-RIB-Out */ - if (prefix_aspath(p) == NULL) - continue; + LIST_FOREACH(p, &re->prefix_h, entry.list.rib) { if (mrtbuf->type == MRT_TABLE_DUMP) mrt_dump_entry(mrtbuf, p, mrtbuf->seqnum++, prefix_peer(p)); diff --git a/usr.sbin/bgpd/parse.y b/usr.sbin/bgpd/parse.y index eeb041bd156..7a0ef65021c 100644 --- a/usr.sbin/bgpd/parse.y +++ b/usr.sbin/bgpd/parse.y @@ -1,4 +1,4 @@ -/* $OpenBSD: parse.y,v 1.392 2019/06/22 05:36:40 claudio Exp $ */ +/* $OpenBSD: parse.y,v 1.393 2019/07/17 10:13:26 claudio Exp $ */ /* * Copyright (c) 2002, 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -3282,8 +3282,6 @@ parse_config(char *filename, struct peer_head *ph) add_rib("Adj-RIB-In", conf->default_tableid, F_RIB_NOFIB | F_RIB_NOEVALUATE); - add_rib("Adj-RIB-Out", conf->default_tableid, - F_RIB_NOFIB | F_RIB_NOEVALUATE); add_rib("Loc-RIB", conf->default_tableid, F_RIB_LOCAL); if ((file = pushfile(filename, 1)) == NULL) diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index b09646f46e5..700ca7a5fe1 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.475 2019/07/01 07:07:08 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.476 2019/07/17 10:13:26 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -94,8 +94,8 @@ u_int8_t rde_roa_validity(struct rde_prefixset *, void peer_init(u_int32_t); void peer_shutdown(void); int peer_localaddrs(struct rde_peer *, struct bgpd_addr *); +struct rde_peer *peer_match(struct ctl_neighbor *, u_int32_t); struct rde_peer *peer_add(u_int32_t, struct peer_config *); -struct rde_peer *peer_get(u_int32_t); void peer_up(u_int32_t, struct session_up *); void peer_down(u_int32_t); void peer_flush(struct rde_peer *, u_int8_t, time_t); @@ -133,7 +133,7 @@ int softreconfig; struct rde_dump_ctx { LIST_ENTRY(rde_dump_ctx) entry; struct ctl_show_rib_request req; - u_int16_t rid; + u_int32_t peerid; u_int8_t throttled; }; @@ -220,7 +220,6 @@ rde_main(int debug, int verbose) /* make sure the default RIBs are setup */ rib_new("Adj-RIB-In", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE); - rib_new("Adj-RIB-Out", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE); out_rules = calloc(1, sizeof(struct filter_head)); if (out_rules == NULL) @@ -2242,7 +2241,7 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) rib.origin = asp->origin; rib.validation_state = p->validation_state; rib.flags = 0; - if (p->re->active == p) + if (p->re != NULL && p->re->active == p) rib.flags |= F_PREF_ACTIVE; if (!prefix_peer(p)->conf.ebgp) rib.flags |= F_PREF_INTERNAL; @@ -2305,7 +2304,7 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) } static int -rde_dump_match_peer(struct rde_peer *p, struct ctl_neighbor *n) +rde_match_peer(struct rde_peer *p, struct ctl_neighbor *n) { char *s; @@ -2326,7 +2325,7 @@ rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req) { struct rde_aspath *asp; - if (!rde_dump_match_peer(prefix_peer(p), &req->neighbor)) + if (!rde_match_peer(prefix_peer(p), &req->neighbor)) return; asp = prefix_aspath(p); @@ -2353,10 +2352,10 @@ rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req) static void rde_dump_upcall(struct rib_entry *re, void *ptr) { - struct prefix *p; struct rde_dump_ctx *ctx = ptr; + struct prefix *p; - LIST_FOREACH(p, &re->prefix_h, rib_l) + LIST_FOREACH(p, &re->prefix_h, entry.list.rib) rde_dump_filter(p, &ctx->req); } @@ -2375,10 +2374,38 @@ rde_dump_prefix_upcall(struct rib_entry *re, void *ptr) if (ctx->req.prefixlen > pt->prefixlen) return; if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen)) - LIST_FOREACH(p, &re->prefix_h, rib_l) + LIST_FOREACH(p, &re->prefix_h, entry.list.rib) rde_dump_filter(p, &ctx->req); } +static void +rde_dump_adjout_upcall(struct prefix *p, void *ptr) +{ + struct rde_dump_ctx *ctx = ptr; + + if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) + return; + rde_dump_filter(p, &ctx->req); +} + +static void +rde_dump_adjout_prefix_upcall(struct prefix *p, void *ptr) +{ + struct rde_dump_ctx *ctx = ptr; + struct bgpd_addr addr; + + if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) + return; + + pt_getaddr(p->pt, &addr); + if (addr.aid != ctx->req.prefix.aid) + return; + if (ctx->req.prefixlen > p->pt->prefixlen) + return; + if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen)) + rde_dump_filter(p, &ctx->req); +} + static int rde_dump_throttled(void *arg) { @@ -2391,11 +2418,45 @@ static void rde_dump_done(void *arg, u_int8_t aid) { struct rde_dump_ctx *ctx = arg; + struct rde_peer *peer; + u_int error; - imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, - -1, NULL, 0); + if (ctx->req.flags & F_CTL_ADJ_OUT) { + peer = peer_match(&ctx->req.neighbor, ctx->peerid); + if (peer == NULL) + goto done; + ctx->peerid = peer->conf.id; + switch (ctx->req.type) { + case IMSG_CTL_SHOW_RIB: + if (prefix_dump_new(peer, ctx->req.aid, + CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall, + rde_dump_done, rde_dump_throttled) == -1) + goto nomem; + break; + case IMSG_CTL_SHOW_RIB_PREFIX: + if (prefix_dump_new(peer, ctx->req.aid, + CTL_MSG_HIGH_MARK, ctx, + rde_dump_adjout_prefix_upcall, + rde_dump_done, rde_dump_throttled) == -1) + goto nomem; + break; + default: + fatalx("%s: unsupported imsg type", __func__); + } + return; + } +done: + imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, -1, NULL, 0); LIST_REMOVE(ctx, entry); free(ctx); + return; + +nomem: + log_warn(__func__); + error = CTL_RES_NOMEM; + imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, ctx->req.pid, -1, &error, + sizeof(error)); + return; } void @@ -2404,24 +2465,92 @@ rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, { struct rde_dump_ctx *ctx; struct rib_entry *re; + struct prefix *p; u_int error; u_int8_t hostplen; u_int16_t rid; if ((ctx = calloc(1, sizeof(*ctx))) == NULL) { nomem: - log_warn("rde_dump_ctx_new"); + log_warn(__func__); error = CTL_RES_NOMEM; imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, sizeof(error)); return; } + + memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request)); + ctx->req.pid = pid; + ctx->req.type = type; + if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) { rid = RIB_ADJ_IN; } else if (req->flags & F_CTL_ADJ_OUT) { - rid = RIB_ADJ_OUT; + struct rde_peer *peer; + + peer = peer_match(&req->neighbor, 0); + if (peer == NULL) { + log_warnx("%s: no peer found for adj-rib-out", + __func__); + error = CTL_RES_NOSUCHPEER; + imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, + &error, sizeof(error)); + free(ctx); + return; + } + ctx->peerid = peer->conf.id; + switch (ctx->req.type) { + case IMSG_CTL_SHOW_RIB: + if (prefix_dump_new(peer, ctx->req.aid, + CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall, + rde_dump_done, rde_dump_throttled) == -1) + goto nomem; + break; + case IMSG_CTL_SHOW_RIB_PREFIX: + if (req->flags & F_LONGER) { + if (prefix_dump_new(peer, ctx->req.aid, + CTL_MSG_HIGH_MARK, ctx, + rde_dump_adjout_prefix_upcall, + rde_dump_done, rde_dump_throttled) == -1) + goto nomem; + break; + } + switch (req->prefix.aid) { + case AID_INET: + case AID_VPN_IPv4: + hostplen = 32; + break; + case AID_INET6: + case AID_VPN_IPv6: + hostplen = 128; + break; + default: + fatalx("%s: unknown af", __func__); + } + + do { + if (req->prefixlen == hostplen) + p = prefix_match(peer, &req->prefix); + else + p = prefix_lookup(peer, &req->prefix, + req->prefixlen); + if (p) + rde_dump_adjout_upcall(p, ctx); + } while ((peer = peer_match(&req->neighbor, + peer->conf.id))); + + imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, + -1, NULL, 0); + free(ctx); + return; + default: + fatalx("%s: unsupported imsg type", __func__); + } + + LIST_INSERT_HEAD(&rde_dump_h, ctx, entry); + return; } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) { - log_warnx("rde_dump_ctx_new: no such rib %s", req->rib); + log_warnx("%s: no such rib %s", __func__, req->rib); error = CTL_RES_NOSUCHRIB; imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, sizeof(error)); @@ -2429,10 +2558,6 @@ rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, return; } - memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request)); - ctx->req.pid = pid; - ctx->req.type = type; - ctx->rid = rid; switch (ctx->req.type) { case IMSG_CTL_SHOW_NETWORK: if (rib_dump_new(rid, ctx->req.aid, CTL_MSG_HIGH_MARK, ctx, @@ -2463,10 +2588,10 @@ rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, hostplen = 128; break; default: - fatalx("rde_dump_ctx_new: unknown af"); + fatalx("%s: unknown af", __func__); } if (req->prefixlen == hostplen) - re = rib_lookup(rib_byid(rid), &req->prefix); + re = rib_match(rib_byid(rid), &req->prefix); else re = rib_get(rib_byid(rid), &req->prefix, req->prefixlen); @@ -2502,21 +2627,7 @@ rde_dump_ctx_terminate(pid_t pid) LIST_FOREACH(ctx, &rde_dump_h, entry) { if (ctx->req.pid == pid) { - void (*upcall)(struct rib_entry *, void *); - switch (ctx->req.type) { - case IMSG_CTL_SHOW_NETWORK: - upcall = network_dump_upcall; - break; - case IMSG_CTL_SHOW_RIB: - upcall = rde_dump_upcall; - break; - case IMSG_CTL_SHOW_RIB_PREFIX: - upcall = rde_dump_prefix_upcall; - break; - default: - fatalx("%s: unsupported imsg type", __func__); - } - rib_dump_terminate(ctx->rid, ctx, upcall); + rib_dump_terminate(ctx); return; } } @@ -2697,16 +2808,9 @@ rde_up_dump_upcall(struct rib_entry *re, void *ptr) } static void -rde_up_flush_upcall(struct rib_entry *re, void *ptr) +rde_up_flush_upcall(struct prefix *p, void *ptr) { - struct rde_peer *peer = ptr; - struct prefix *p, *np; - - LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) { - if (peer != prefix_peer(p)) - continue; - up_generate_updates(out_rules, peer, NULL, p); - } + up_generate_updates(out_rules, prefix_peer(p), NULL, p); } static void @@ -3011,18 +3115,16 @@ rde_reload_done(void) peer->reconf_out = 0; peer->reconf_rib = 0; if (peer->loc_rib_id != rib_find(peer->conf.rib)) { - char *p = log_fmt_peer(&peer->conf); - log_debug("rib change: reloading peer %s", p); - free(p); + log_peer_info(&peer->conf, "rib change, reloading"); peer->loc_rib_id = rib_find(peer->conf.rib); if (peer->loc_rib_id == RIB_NOTFOUND) fatalx("King Bula's peer met an unknown RIB"); peer->reconf_rib = 1; softreconfig++; - if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, - RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall, + if (prefix_dump_new(peer, AID_UNSPEC, + RDE_RUNNER_ROUNDS, NULL, rde_up_flush_upcall, rde_softreconfig_in_done, NULL) == -1) - fatal("%s: rib_dump_new", __func__); + fatal("%s: prefix_dump_new", __func__); log_peer_info(&peer->conf, "flushing Adj-RIB-Out"); continue; } @@ -3197,7 +3299,7 @@ rde_softreconfig_in(struct rib_entry *re, void *bula) pt = re->prefix; pt_getaddr(pt, &prefix); - LIST_FOREACH(p, &re->prefix_h, rib_l) { + LIST_FOREACH(p, &re->prefix_h, entry.list.rib) { asp = prefix_aspath(p); peer = prefix_peer(p); force_eval = 0; @@ -3358,6 +3460,35 @@ peer_get(u_int32_t id) } struct rde_peer * +peer_match(struct ctl_neighbor *n, u_int32_t peerid) +{ + struct rde_peer_head *head; + struct rde_peer *peer; + u_int32_t i = 0; + + if (peerid != 0) + i = peerid & peertable.peer_hashmask; + + while (i <= peertable.peer_hashmask) { + head = &peertable.peer_hashtbl[i]; + LIST_FOREACH(peer, head, hash_l) { + /* skip peers until peerid is found */ + if (peerid == peer->conf.id) { + peerid = 0; + continue; + } + if (peerid != 0) + continue; + + if (rde_match_peer(peer, n)) + return (peer); + } + i++; + } + return (NULL); +} + +struct rde_peer * peer_add(u_int32_t id, struct peer_config *p_conf) { struct rde_peer_head *head; @@ -3441,17 +3572,9 @@ peer_localaddrs(struct rde_peer *peer, struct bgpd_addr *laddr) } static void -peer_adjout_flush_upcall(struct rib_entry *re, void *arg) +peer_adjout_clear_upcall(struct prefix *p, void *arg) { - struct rde_peer *peer = arg; - struct prefix *p, *np; - - LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) { - if (peer != prefix_peer(p)) - continue; - prefix_destroy(p); - break; /* optimization, only one match per peer possible */ - } + prefix_adjout_destroy(p); } void @@ -3472,9 +3595,9 @@ peer_up(u_int32_t id, struct session_up *sup) * There is a race condition when doing PEER_ERR -> PEER_DOWN. * So just do a full reset of the peer here. */ - if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, 0, peer, - peer_adjout_flush_upcall, NULL, NULL) == -1) - fatal("%s: rib_dump_new", __func__); + if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL, + peer_adjout_clear_upcall, NULL, NULL) == -1) + fatal("%s: prefix_dump_new", __func__); peer_flush(peer, AID_UNSPEC, 0); peer->prefix_cnt = 0; peer->state = PEER_DOWN; @@ -3519,12 +3642,12 @@ peer_down(u_int32_t id) peer->remote_bgpid = 0; peer->state = PEER_DOWN; /* stop all pending dumps which may depend on this peer */ - rib_dump_terminate(peer->loc_rib_id, peer, rde_up_dump_upcall); + rib_dump_terminate(peer); /* flush Adj-RIB-Out for this peer */ - if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, 0, peer, - peer_adjout_flush_upcall, NULL, NULL) == -1) - fatal("%s: rib_dump_new", __func__); + if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL, + peer_adjout_clear_upcall, NULL, NULL) == -1) + fatal("%s: prefix_dump_new", __func__); peer_flush(peer, AID_UNSPEC, 0); @@ -3553,7 +3676,7 @@ peer_flush_upcall(struct rib_entry *re, void *arg) pt_getaddr(re->prefix, &addr); prefixlen = re->prefix->prefixlen; - LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) { + LIST_FOREACH_SAFE(p, &re->prefix_h, entry.list.rib, np) { if (peer != prefix_peer(p)) continue; if (staletime && p->lastchange > staletime) @@ -3888,7 +4011,7 @@ network_dump_upcall(struct rib_entry *re, void *ptr) struct bgpd_addr addr; struct rde_dump_ctx *ctx = ptr; - LIST_FOREACH(p, &re->prefix_h, rib_l) { + LIST_FOREACH(p, &re->prefix_h, entry.list.rib) { asp = prefix_aspath(p); if (!(asp->flags & F_PREFIX_ANNOUNCED)) continue; @@ -3925,7 +4048,7 @@ network_flush_upcall(struct rib_entry *re, void *ptr) pt_getaddr(re->prefix, &addr); prefixlen = re->prefix->prefixlen; - LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) { + LIST_FOREACH_SAFE(p, &re->prefix_h, entry.list.rib, np) { if (prefix_peer(p) != peer) continue; asp = prefix_aspath(p); @@ -3966,9 +4089,6 @@ rde_shutdown(void) while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL) peer_down(p->conf.id); - /* then since decision process is off, kill RIB_ADJ_OUT */ - rib_free(rib_byid(RIB_ADJ_OUT)); - /* free filters */ filterlist_free(out_rules); filterlist_free(out_rules_tmp); diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h index 9d0f4978a17..a0ff7842b53 100644 --- a/usr.sbin/bgpd/rde.h +++ b/usr.sbin/bgpd/rde.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.h,v 1.219 2019/07/01 07:07:08 claudio Exp $ */ +/* $OpenBSD: rde.h,v 1.220 2019/07/17 10:13:26 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> and @@ -57,8 +57,7 @@ struct rib { }; #define RIB_ADJ_IN 0 -#define RIB_ADJ_OUT 1 -#define RIB_LOC_START 2 +#define RIB_LOC_START 1 #define RIB_NOTFOUND 0xffff struct rib_desc { @@ -78,6 +77,7 @@ LIST_HEAD(aspath_list, aspath); LIST_HEAD(attr_list, attr); LIST_HEAD(aspath_head, rde_aspath); RB_HEAD(prefix_tree, prefix); +RB_HEAD(prefix_index, prefix); struct rde_peer { LIST_ENTRY(rde_peer) hash_l; /* hash list over all peers */ @@ -87,6 +87,7 @@ struct rde_peer { struct bgpd_addr local_v4_addr; struct bgpd_addr local_v6_addr; struct capabilities capa; + struct prefix_index adj_rib_out; struct prefix_tree updates[AID_MAX]; struct prefix_tree withdraws[AID_MAX]; time_t staletime[AID_MAX]; @@ -306,8 +307,14 @@ struct pt_entry_vpn6 { }; struct prefix { - LIST_ENTRY(prefix) rib_l, nexthop_l; - RB_ENTRY(prefix) entry; + union { + struct { + LIST_ENTRY(prefix) rib, nexthop; + } list; + struct { + RB_ENTRY(prefix) index, update; + } tree; + } entry; struct pt_entry *pt; struct rib_entry *re; struct rde_aspath *aspath; @@ -317,12 +324,17 @@ struct prefix { time_t lastchange; u_int8_t validation_state; u_int8_t nhflags; - u_int8_t flags; u_int8_t eor; -#define PREFIX_FLAG_WITHDRAW 0x01 -#define PREFIX_FLAG_UPDATE 0x02 + u_int8_t flags; +#define PREFIX_FLAG_WITHDRAW 0x01 /* queued for withdraw */ +#define PREFIX_FLAG_UPDATE 0x02 /* queued for update */ +#define PREFIX_FLAG_DEAD 0x04 /* locked but removed */ +#define PREFIX_FLAG_MASK 0x07 /* mask for the three prefix types */ +#define PREFIX_NEXTHOP_LINKED 0x40 /* prefix is linked onto nexthop list */ +#define PREFIX_FLAG_LOCKED 0x80 /* locked by rib walker */ }; +/* possible states for nhflags */ #define NEXTHOP_SELF 0x01 #define NEXTHOP_REJECT 0x02 #define NEXTHOP_BLACKHOLE 0x04 @@ -356,6 +368,7 @@ u_int32_t rde_local_as(void); int rde_noevaluate(void); int rde_decisionflags(void); int rde_as4byte(struct rde_peer *); +struct rde_peer *peer_get(u_int32_t); /* rde_attr.c */ int attr_write(void *, u_int16_t, u_int8_t, u_int8_t, void *, @@ -395,6 +408,7 @@ u_char *aspath_override(struct aspath *, u_int32_t, u_int32_t, u_int16_t *); int aspath_lenmatch(struct aspath *, enum aslen_spec, u_int); +/* rde_community.c */ int community_match(struct rde_community *, struct community *, struct rde_peer *); int community_set(struct rde_community *, struct community *, @@ -499,15 +513,14 @@ struct rib_desc *rib_desc(struct rib *); void rib_free(struct rib *); void rib_shutdown(void); struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int); -struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *); +struct rib_entry *rib_match(struct rib *, struct bgpd_addr *); int rib_dump_pending(void); void rib_dump_runner(void); int rib_dump_new(u_int16_t, u_int8_t, unsigned int, void *, void (*)(struct rib_entry *, void *), void (*)(void *, u_int8_t), int (*)(void *)); -void rib_dump_terminate(u_int16_t, void *, - void (*)(struct rib_entry *, void *)); +void rib_dump_terminate(void *); static inline struct rib * re_rib(struct rib_entry *re) @@ -540,13 +553,20 @@ void path_put(struct rde_aspath *); #define PREFIX_SIZE(x) (((x) + 7) / 8 + 1) struct prefix *prefix_get(struct rib *, struct rde_peer *, struct bgpd_addr *, int); +struct prefix *prefix_lookup(struct rde_peer *, struct bgpd_addr *, int); +struct prefix *prefix_match(struct rde_peer *, struct bgpd_addr *); int prefix_remove(struct rib *, struct rde_peer *, struct bgpd_addr *, int); void prefix_add_eor(struct rde_peer *, u_int8_t); -void prefix_update(struct rib *, struct rde_peer *, - struct bgpd_addr *, int); -int prefix_withdraw(struct rib *, struct rde_peer *, - struct bgpd_addr *, int); +int prefix_update(struct rde_peer *, struct filterstate *, + struct bgpd_addr *, int, u_int8_t); +int prefix_withdraw(struct rde_peer *, struct bgpd_addr *, int); +void prefix_adjout_destroy(struct prefix *p); +void prefix_adjout_dump(struct rde_peer *, void *, + void (*)(struct prefix *, void *)); +int prefix_dump_new(struct rde_peer *, u_int8_t, unsigned int, + void *, void (*)(struct prefix *, void *), + void (*)(void *, u_int8_t), int (*)(void *)); int prefix_write(u_char *, int, struct bgpd_addr *, u_int8_t, int); int prefix_writebuf(struct ibuf *, struct bgpd_addr *, u_int8_t); struct prefix *prefix_bypeer(struct rib_entry *, struct rde_peer *); diff --git a/usr.sbin/bgpd/rde_decide.c b/usr.sbin/bgpd/rde_decide.c index 47110cf1880..e2d2327b1a6 100644 --- a/usr.sbin/bgpd/rde_decide.c +++ b/usr.sbin/bgpd/rde_decide.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_decide.c,v 1.74 2019/01/21 02:07:56 claudio Exp $ */ +/* $OpenBSD: rde_decide.c,v 1.75 2019/07/17 10:13:26 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -245,7 +245,7 @@ prefix_evaluate(struct prefix *p, struct rib_entry *re) if (re_rib(re)->flags & F_RIB_NOEVALUATE || rde_noevaluate()) { /* decision process is turned off */ if (p != NULL) - LIST_INSERT_HEAD(&re->prefix_h, p, rib_l); + LIST_INSERT_HEAD(&re->prefix_h, p, entry.list.rib); if (re->active != NULL) re->active = NULL; return; @@ -253,15 +253,18 @@ prefix_evaluate(struct prefix *p, struct rib_entry *re) if (p != NULL) { if (LIST_EMPTY(&re->prefix_h)) - LIST_INSERT_HEAD(&re->prefix_h, p, rib_l); + LIST_INSERT_HEAD(&re->prefix_h, p, entry.list.rib); else { - LIST_FOREACH(xp, &re->prefix_h, rib_l) { + LIST_FOREACH(xp, &re->prefix_h, entry.list.rib) { if (prefix_cmp(p, xp) > 0) { - LIST_INSERT_BEFORE(xp, p, rib_l); + LIST_INSERT_BEFORE(xp, p, + entry.list.rib); break; - } else if (LIST_NEXT(xp, rib_l) == NULL) { + } else if (LIST_NEXT(xp, entry.list.rib) == + NULL) { /* if xp last element ... */ - LIST_INSERT_AFTER(xp, p, rib_l); + LIST_INSERT_AFTER(xp, p, + entry.list.rib); break; } } diff --git a/usr.sbin/bgpd/rde_rib.c b/usr.sbin/bgpd/rde_rib.c index 5e3e3221746..3b57f2d3fd4 100644 --- a/usr.sbin/bgpd/rde_rib.c +++ b/usr.sbin/bgpd/rde_rib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_rib.c,v 1.198 2019/07/01 14:47:56 claudio Exp $ */ +/* $OpenBSD: rde_rib.c,v 1.199 2019/07/17 10:13:26 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -52,8 +52,10 @@ RB_GENERATE(rib_tree, rib_entry, rib_e, rib_compare); struct rib_context { LIST_ENTRY(rib_context) entry; struct rib_entry *ctx_re; - u_int16_t ctx_rib_id; - void (*ctx_upcall)(struct rib_entry *, void *); + struct prefix *ctx_p; + u_int32_t ctx_id; + void (*ctx_rib_call)(struct rib_entry *, void *); + void (*ctx_prefix_call)(struct prefix *, void *); void (*ctx_done)(void *, u_int8_t); int (*ctx_throttle)(void *); void *ctx_arg; @@ -69,6 +71,7 @@ static int prefix_add(struct bgpd_addr *, int, struct rib *, static int prefix_move(struct prefix *, struct rde_peer *, struct rde_aspath *, struct rde_community *, struct nexthop *, u_int8_t, u_int8_t); +static void prefix_dump_r(struct rib_context *); static inline struct rib_entry * re_lock(struct rib_entry *re) @@ -128,7 +131,7 @@ rib_new(char *name, u_int rtableid, u_int16_t flags) rib_size = id + 1; } - bzero(&ribs[id], sizeof(struct rib_desc)); + memset(&ribs[id], 0, sizeof(struct rib_desc)); strlcpy(ribs[id].name, name, sizeof(ribs[id].name)); RB_INIT(rib_tree(&ribs[id].rib)); ribs[id].state = RECONF_REINIT; @@ -196,7 +199,7 @@ rib_free(struct rib *rib) */ while ((p = LIST_FIRST(&re->prefix_h))) { struct rde_aspath *asp = prefix_aspath(p); - np = LIST_NEXT(p, rib_l); + np = LIST_NEXT(p, entry.list.rib); if (asp && asp->pftableid) { struct bgpd_addr addr; @@ -215,7 +218,7 @@ rib_free(struct rib *rib) rd = &ribs[rib->id]; filterlist_free(rd->in_rules_tmp); filterlist_free(rd->in_rules); - bzero(rd, sizeof(struct rib_desc)); + memset(rd, 0, sizeof(struct rib_desc)); } void @@ -235,7 +238,7 @@ rib_shutdown(void) struct rib_desc *rd = &ribs[id]; filterlist_free(rd->in_rules_tmp); filterlist_free(rd->in_rules); - bzero(rd, sizeof(struct rib_desc)); + memset(rd, 0, sizeof(struct rib_desc)); } free(ribs); } @@ -247,7 +250,7 @@ rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) struct pt_entry *pte; pte = pt_fill(prefix, prefixlen); - bzero(&xre, sizeof(xre)); + memset(&xre, 0, sizeof(xre)); xre.prefix = pte; re = RB_FIND(rib_tree, rib_tree(rib), &xre); @@ -258,7 +261,7 @@ rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) } struct rib_entry * -rib_lookup(struct rib *rib, struct bgpd_addr *addr) +rib_match(struct rib *rib, struct bgpd_addr *addr) { struct rib_entry *re; int i; @@ -281,7 +284,7 @@ rib_lookup(struct rib *rib, struct bgpd_addr *addr) } break; default: - fatalx("rib_lookup: unknown af"); + fatalx("%s: unknown af", __func__); } return (NULL); } @@ -367,9 +370,9 @@ rib_dump_r(struct rib_context *ctx) struct rib *rib; unsigned int i; - rib = rib_byid(ctx->ctx_rib_id); + rib = rib_byid(ctx->ctx_id); if (rib == NULL) - fatalx("%s: rib id %u gone", __func__, ctx->ctx_rib_id); + fatalx("%s: rib id %u gone", __func__, ctx->ctx_id); if (ctx->ctx_re == NULL) re = RB_MIN(rib_tree, rib_tree(rib)); @@ -378,9 +381,9 @@ rib_dump_r(struct rib_context *ctx) for (i = 0; re != NULL; re = next) { next = RB_NEXT(rib_tree, unused, re); - if (re->rib_id != ctx->ctx_rib_id) + if (re->rib_id != ctx->ctx_id) fatalx("%s: Unexpected RIB %u != %u.", __func__, - re->rib_id, ctx->ctx_rib_id); + re->rib_id, ctx->ctx_id); if (ctx->ctx_aid != AID_UNSPEC && ctx->ctx_aid != re->prefix->aid) continue; @@ -391,7 +394,7 @@ rib_dump_r(struct rib_context *ctx) re_lock(re); return; } - ctx->ctx_upcall(re, ctx->ctx_arg); + ctx->ctx_rib_call(re, ctx->ctx_arg); } if (ctx->ctx_done) @@ -422,7 +425,10 @@ rib_dump_runner(void) LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) { if (ctx->ctx_throttle && ctx->ctx_throttle(ctx->ctx_arg)) continue; - rib_dump_r(ctx); + if (ctx->ctx_rib_call != NULL) + rib_dump_r(ctx); + else + prefix_dump_r(ctx); } } @@ -432,7 +438,7 @@ rib_dump_abort(u_int16_t id) struct rib_context *ctx, *next; LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) { - if (id != ctx->ctx_rib_id) + if (id != ctx->ctx_id) continue; if (ctx->ctx_done) ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid); @@ -450,11 +456,11 @@ rib_dump_new(u_int16_t id, u_int8_t aid, unsigned int count, void *arg, if ((ctx = calloc(1, sizeof(*ctx))) == NULL) return -1; - ctx->ctx_rib_id = id; + ctx->ctx_id = id; ctx->ctx_aid = aid; ctx->ctx_count = count; ctx->ctx_arg = arg; - ctx->ctx_upcall = upcall; + ctx->ctx_rib_call = upcall; ctx->ctx_done = done; ctx->ctx_throttle = throttle; @@ -468,14 +474,12 @@ rib_dump_new(u_int16_t id, u_int8_t aid, unsigned int count, void *arg, } void -rib_dump_terminate(u_int16_t id, void *arg, - void (*upcall)(struct rib_entry *, void *)) +rib_dump_terminate(void *arg) { struct rib_context *ctx, *next; LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) { - if (id != ctx->ctx_rib_id || ctx->ctx_arg != arg || - ctx->ctx_upcall != upcall) + if (ctx->ctx_arg != arg) continue; if (ctx->ctx_done) ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid); @@ -610,15 +614,6 @@ path_update(struct rib *rib, struct rde_peer *peer, struct filterstate *state, p->validation_state = vstate; return (2); } - if (p->flags) { - struct prefix_tree *prefix_head; - /* prefix is a pending update */ - prefix_head = p->flags & PREFIX_FLAG_UPDATE ? - &peer->updates[prefix->aid] : - &peer->withdraws[prefix->aid]; - RB_REMOVE(prefix_tree, prefix_head, p); - p->flags = 0; - } } /* @@ -881,7 +876,14 @@ prefix_cmp(struct prefix *a, struct prefix *b) return pt_prefix_cmp(a->pt, b->pt); } -RB_GENERATE(prefix_tree, prefix, entry, prefix_cmp) +static inline int +prefix_index_cmp(struct prefix *a, struct prefix *b) +{ + return pt_prefix_cmp(a->pt, b->pt); +} + +RB_GENERATE(prefix_tree, prefix, entry.tree.update, prefix_cmp) +RB_GENERATE_STATIC(prefix_index, prefix, entry.tree.index, prefix_index_cmp) /* * search for specified prefix of a peer. Returns NULL if not found. @@ -899,6 +901,52 @@ prefix_get(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix, } /* + * lookup prefix in the peer prefix_index. Returns NULL if not found. + */ +struct prefix * +prefix_lookup(struct rde_peer *peer, struct bgpd_addr *prefix, + int prefixlen) +{ + struct prefix xp; + struct pt_entry *pte; + + memset(&xp, 0, sizeof(xp)); + pte = pt_fill(prefix, prefixlen); + xp.pt = pte; + + return RB_FIND(prefix_index, &peer->adj_rib_out, &xp); +} + +struct prefix * +prefix_match(struct rde_peer *peer, struct bgpd_addr *addr) +{ + struct prefix *p; + int i; + + switch (addr->aid) { + case AID_INET: + case AID_VPN_IPv4: + for (i = 32; i >= 0; i--) { + p = prefix_lookup(peer, addr, i); + if (p != NULL) + return p; + } + break; + case AID_INET6: + case AID_VPN_IPv6: + for (i = 128; i >= 0; i--) { + p = prefix_lookup(peer, addr, i); + if (p != NULL) + return p; + } + break; + default: + fatalx("%s: unknown af", __func__); + } + return NULL; +} + +/* * Adds or updates a prefix. */ static int @@ -936,8 +984,8 @@ prefix_move(struct prefix *p, struct rde_peer *peer, np->aspath = path_ref(asp); np->communities = communities_ref(comm); np->peer = peer; - np->pt = p->pt; /* skip refcnt update since ref is moved */ np->re = p->re; + np->pt = p->pt; /* skip refcnt update since ref is moved */ np->validation_state = vstate; np->nhflags = nhflags; np->nexthop = nexthop_ref(nexthop); @@ -957,7 +1005,7 @@ prefix_move(struct prefix *p, struct rde_peer *peer, * This is safe because we create a new prefix and so the change * is noticed by prefix_evaluate(). */ - LIST_REMOVE(p, rib_l); + LIST_REMOVE(p, entry.list.rib); prefix_evaluate(np, np->re); /* remove old prefix node */ @@ -1020,26 +1068,97 @@ prefix_add_eor(struct rde_peer *peer, u_int8_t aid) if (RB_INSERT(prefix_tree, &peer->updates[aid], p) != NULL) /* no need to add if EoR marker already present */ prefix_free(p); + /* EOR marker is not inserted into the adj_rib_out index */ } /* * Put a prefix from the Adj-RIB-Out onto the update queue. */ -void -prefix_update(struct rib *rib, struct rde_peer *peer, - struct bgpd_addr *prefix, int prefixlen) +int +prefix_update(struct rde_peer *peer, struct filterstate *state, + struct bgpd_addr *prefix, int prefixlen, u_int8_t vstate) { + struct prefix_tree *prefix_head = NULL; + struct rde_aspath *asp; + struct rde_community *comm; struct prefix *p; + int created = 0; + + if ((p = prefix_lookup(peer, prefix, prefixlen)) != NULL) { + /* prefix is already in the Adj-RIB-Out */ + if (p->flags & PREFIX_FLAG_WITHDRAW) { + created = 1; /* consider this a new entry */ + peer->up_wcnt--; + prefix_head = &peer->withdraws[prefix->aid]; + RB_REMOVE(prefix_tree, prefix_head, p); + } else if (p->flags & PREFIX_FLAG_DEAD) { + created = 1; /* consider this a new entry */ + } else { + if (prefix_nhflags(p) == state->nhflags && + prefix_nexthop(p) == state->nexthop && + communities_equal(&state->communities, + prefix_communities(p)) && + path_compare(&state->aspath, prefix_aspath(p)) == + 0) { + /* nothing changed */ + p->validation_state = vstate; + p->lastchange = time(NULL); + return 0; + } - p = prefix_get(rib, peer, prefix, prefixlen); - if (p == NULL) /* Got a dummy withdrawn request. */ - return; + if (p->flags & PREFIX_FLAG_UPDATE) { + /* created = 0 so up_nlricnt is not increased */ + prefix_head = &peer->updates[prefix->aid]; + RB_REMOVE(prefix_tree, prefix_head, p); + } + } + /* unlink from aspath and remove nexthop ref */ + nexthop_unref(p->nexthop); + communities_unref(p->communities); + path_unref(p->aspath); + p->flags &= ~PREFIX_FLAG_MASK; + + /* peer and pt remain */ + } else { + p = prefix_alloc(); + created = 1; + + p->pt = pt_get(prefix, prefixlen); + if (p->pt == NULL) + fatalx("%s: update for non existing prefix", __func__); + pt_ref(p->pt); + p->peer = peer; + + if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL) + fatalx("%s: RB index invariant violated", __func__); + } - if (p->flags != 0) + if ((asp = path_lookup(&state->aspath)) == NULL) { + /* Path not available, create and link a new one. */ + asp = path_copy(path_get(), &state->aspath); + path_link(asp); + } + + if ((comm = communities_lookup(&state->communities)) == NULL) { + /* Communities not available, create and link a new one. */ + comm = communities_link(&state->communities); + } + + p->aspath = path_ref(asp); + p->communities = communities_ref(comm); + p->nexthop = nexthop_ref(state->nexthop); + p->nhflags = state->nhflags; + + p->validation_state = vstate; + p->lastchange = time(NULL); + + if (p->flags & PREFIX_FLAG_MASK) fatalx("%s: bad flags %x", __func__, p->flags); - p->flags = PREFIX_FLAG_UPDATE; + p->flags |= PREFIX_FLAG_UPDATE; if (RB_INSERT(prefix_tree, &peer->updates[prefix->aid], p) != NULL) fatalx("%s: RB tree invariant violated", __func__); + + return created; } /* @@ -1047,15 +1166,19 @@ prefix_update(struct rib *rib, struct rde_peer *peer, * the prefix in the RIB linked to the peer withdraw list. */ int -prefix_withdraw(struct rib *rib, struct rde_peer *peer, - struct bgpd_addr *prefix, int prefixlen) +prefix_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix, int prefixlen) { struct prefix *p; - p = prefix_get(rib, peer, prefix, prefixlen); + p = prefix_lookup(peer, prefix, prefixlen); if (p == NULL) /* Got a dummy withdrawn request. */ return (0); + /* remove nexthop ref ... */ + nexthop_unref(p->nexthop); + p->nexthop = NULL; + p->nhflags = 0; + /* unlink from aspath ...*/ path_unref(p->aspath); p->aspath = NULL; @@ -1063,29 +1186,181 @@ prefix_withdraw(struct rib *rib, struct rde_peer *peer, /* ... communities ... */ communities_unref(p->communities); p->communities = NULL; + /* and unlink from aspath */ + path_unref(p->aspath); + p->aspath = NULL; + /* re already NULL */ - /* ... and nexthop but keep the re link */ - nexthop_unlink(p); - nexthop_unref(p->nexthop); - p->nexthop = NULL; - p->nhflags = 0; - /* re link still exists */ + p->lastchange = time(NULL); - if (p->flags) { + if (p->flags & PREFIX_FLAG_MASK) { struct prefix_tree *prefix_head; /* p is a pending update or withdraw, remove first */ prefix_head = p->flags & PREFIX_FLAG_UPDATE ? &peer->updates[prefix->aid] : &peer->withdraws[prefix->aid]; RB_REMOVE(prefix_tree, prefix_head, p); - p->flags = 0; + p->flags &= ~PREFIX_FLAG_MASK; } - p->flags = PREFIX_FLAG_WITHDRAW; + p->flags |= PREFIX_FLAG_WITHDRAW; if (RB_INSERT(prefix_tree, &peer->withdraws[prefix->aid], p) != NULL) fatalx("%s: RB tree invariant violated", __func__); return (1); } +static inline void +prefix_lock(struct prefix *p) +{ + if (p->flags & PREFIX_FLAG_LOCKED) + fatalx("%s: locking locked prefix", __func__); + p->flags |= PREFIX_FLAG_LOCKED; +} + +static inline void +prefix_unlock(struct prefix *p) +{ + if ((p->flags & PREFIX_FLAG_LOCKED) == 0) + fatalx("%s: unlocking unlocked prefix", __func__); + p->flags &= ~PREFIX_FLAG_LOCKED; +} + +static inline int +prefix_is_locked(struct prefix *p) +{ + return (p->flags & PREFIX_FLAG_LOCKED) != 0; +} + +static inline int +prefix_is_dead(struct prefix *p) +{ + return (p->flags & PREFIX_FLAG_DEAD) != 0; +} + +static struct prefix * +prefix_restart(struct rib_context *ctx) +{ + struct prefix *p; + + p = ctx->ctx_p; + prefix_unlock(p); + + if (prefix_is_dead(p)) { + struct prefix *next; + + next = RB_NEXT(prefix_index, unused, p); + prefix_adjout_destroy(p); + p = next; + } + return p; +} + +void +prefix_adjout_destroy(struct prefix *p) +{ + struct rde_peer *peer = prefix_peer(p); + + if (p->eor) { + /* EOR marker is not linked in the index */ + prefix_free(p); + return; + } + + if (p->flags & PREFIX_FLAG_WITHDRAW) + RB_REMOVE(prefix_tree, &peer->withdraws[p->pt->aid], p); + else if (p->flags & PREFIX_FLAG_UPDATE) + RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p); + /* nothing needs to be done for PREFIX_FLAG_DEAD */ + p->flags &= ~PREFIX_FLAG_MASK; + + + if (prefix_is_locked(p)) { + /* remove nexthop ref ... */ + nexthop_unref(p->nexthop); + p->nexthop = NULL; + /* ... communities ... */ + communities_unref(p->communities); + p->communities = NULL; + /* and unlink from aspath */ + path_unref(p->aspath); + p->aspath = NULL; + p->nhflags = 0; + /* re already NULL */ + + /* finally mark prefix dead */ + p->flags |= PREFIX_FLAG_DEAD; + return; + } + + RB_REMOVE(prefix_index, &peer->adj_rib_out, p); + + prefix_unlink(p); + prefix_free(p); +} + +static void +prefix_dump_r(struct rib_context *ctx) +{ + struct prefix *p, *next; + struct rde_peer *peer; + unsigned int i; + + if ((peer = peer_get(ctx->ctx_id)) == NULL) + goto done; + + if (ctx->ctx_p == NULL) + p = RB_MIN(prefix_index, &peer->adj_rib_out); + else + p = prefix_restart(ctx); + + for (i = 0; p != NULL; p = next) { + next = RB_NEXT(prefix_index, unused, p); + if (prefix_is_dead(p)) + continue; + if (ctx->ctx_aid != AID_UNSPEC && + ctx->ctx_aid != p->pt->aid) + continue; + if (ctx->ctx_count && i++ >= ctx->ctx_count && + !prefix_is_locked(p)) { + /* store and lock last element */ + ctx->ctx_p = p; + prefix_lock(p); + return; + } + ctx->ctx_prefix_call(p, ctx->ctx_arg); + } + +done: + if (ctx->ctx_done) + ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid); + LIST_REMOVE(ctx, entry); + free(ctx); +} + +int +prefix_dump_new(struct rde_peer *peer, u_int8_t aid, unsigned int count, + void *arg, void (*upcall)(struct prefix *, void *), + void (*done)(void *, u_int8_t), int (*throttle)(void *)) +{ + struct rib_context *ctx; + + if ((ctx = calloc(1, sizeof(*ctx))) == NULL) + return -1; + ctx->ctx_id = peer->conf.id; + ctx->ctx_aid = aid; + ctx->ctx_count = count; + ctx->ctx_arg = arg; + ctx->ctx_prefix_call = upcall; + ctx->ctx_done = done; + ctx->ctx_throttle = throttle; + + LIST_INSERT_HEAD(&rib_dumps, ctx, entry); + + /* requested a sync traversal */ + if (count == 0) + prefix_dump_r(ctx); + + return 0; +} /* dump a prefix into specified buffer */ int @@ -1205,7 +1480,7 @@ prefix_bypeer(struct rib_entry *re, struct rde_peer *peer) { struct prefix *p; - LIST_FOREACH(p, &re->prefix_h, rib_l) + LIST_FOREACH(p, &re->prefix_h, entry.list.rib) if (prefix_peer(p) == peer) return (p); return (NULL); @@ -1237,7 +1512,7 @@ prefix_updateall(struct prefix *p, enum nexthop_state state, } /* redo the route decision */ - LIST_REMOVE(p, rib_l); + LIST_REMOVE(p, entry.list.rib); /* * If the prefix is the active one remove it first, * this has to be done because we can not detect when @@ -1255,6 +1530,10 @@ prefix_updateall(struct prefix *p, enum nexthop_state state, void prefix_destroy(struct prefix *p) { + /* make route decision */ + LIST_REMOVE(p, entry.list.rib); + prefix_evaluate(NULL, p->re); + prefix_unlink(p); prefix_free(p); } @@ -1290,13 +1569,6 @@ prefix_unlink(struct prefix *p) { struct rib_entry *re = p->re; - if (p->eor) /* nothing to unlink for EoR markers */ - return; - - /* make route decision */ - LIST_REMOVE(p, rib_l); - prefix_evaluate(NULL, re); - /* destroy all references to other objects */ nexthop_unlink(p); nexthop_unref(p->nexthop); @@ -1310,7 +1582,7 @@ prefix_unlink(struct prefix *p) p->re = NULL; p->pt = NULL; - if (rib_empty(re)) + if (re && rib_empty(re)) rib_remove(re); /* @@ -1319,7 +1591,7 @@ prefix_unlink(struct prefix *p) */ } -/* alloc and bzero new entry. May not fail. */ +/* alloc and zero new entry. May not fail. */ static struct prefix * prefix_alloc(void) { @@ -1430,7 +1702,7 @@ nexthop_runner(void) p = nh->next_prefix; for (j = 0; p != NULL && j < RDE_RUNNER_ROUNDS; j++) { prefix_updateall(p, nh->state, nh->oldstate); - p = LIST_NEXT(p, nexthop_l); + p = LIST_NEXT(p, entry.list.nexthop); } /* prep for next run, if not finished readd to tail of queue */ @@ -1540,22 +1812,21 @@ nexthop_link(struct prefix *p) if (re_rib(p->re)->flags & F_RIB_NOEVALUATE) return; - LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, nexthop_l); + p->flags |= PREFIX_NEXTHOP_LINKED; + LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, entry.list.nexthop); } void nexthop_unlink(struct prefix *p) { - if (p->nexthop == NULL) - return; - - if (re_rib(p->re)->flags & F_RIB_NOEVALUATE) + if (p->nexthop == NULL || (p->flags & PREFIX_NEXTHOP_LINKED) == 0) return; if (p == p->nexthop->next_prefix) - p->nexthop->next_prefix = LIST_NEXT(p, nexthop_l); + p->nexthop->next_prefix = LIST_NEXT(p, entry.list.nexthop); - LIST_REMOVE(p, nexthop_l); + p->flags &= ~PREFIX_NEXTHOP_LINKED; + LIST_REMOVE(p, entry.list.nexthop); } struct nexthop * diff --git a/usr.sbin/bgpd/rde_update.c b/usr.sbin/bgpd/rde_update.c index 6a22a44d337..9ca9ca58c92 100644 --- a/usr.sbin/bgpd/rde_update.c +++ b/usr.sbin/bgpd/rde_update.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_update.c,v 1.119 2019/07/02 12:07:00 claudio Exp $ */ +/* $OpenBSD: rde_update.c,v 1.120 2019/07/17 10:13:26 claudio Exp $ */ /* * Copyright (c) 2004 Claudio Jeker <claudio@openbsd.org> @@ -143,8 +143,7 @@ withdraw: /* withdraw prefix */ pt_getaddr(old->pt, &addr); - if (prefix_withdraw(&ribs[RIB_ADJ_OUT].rib, peer, &addr, - old->pt->prefixlen) == 1) + if (prefix_withdraw(peer, &addr, old->pt->prefixlen) == 1) peer->up_wcnt++; } else { switch (up_test_update(peer, new)) { @@ -165,13 +164,11 @@ withdraw: } pt_getaddr(new->pt, &addr); - if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr, - new->pt->prefixlen, prefix_vstate(new)) != 2) { - /* only send update if path changed */ - prefix_update(&ribs[RIB_ADJ_OUT].rib, peer, &addr, - new->pt->prefixlen); + + /* only send update if path changed */ + if (prefix_update(peer, &state, &addr, new->pt->prefixlen, + prefix_vstate(new)) == 1) peer->up_nlricnt++; - } rde_filterstate_clean(&state); } @@ -229,11 +226,8 @@ up_generate_default(struct filter_head *rules, struct rde_peer *peer, return; } - if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr, 0, - ROA_NOTFOUND) != 2) { - prefix_update(&ribs[RIB_ADJ_OUT].rib, peer, &addr, 0); + if (prefix_update(peer, &state, &addr, 0, ROA_NOTFOUND) == 1) peer->up_nlricnt++; - } /* no longer needed */ rde_filterstate_clean(&state); @@ -576,8 +570,13 @@ up_is_eor(struct rde_peer *peer, u_int8_t aid) p = RB_MIN(prefix_tree, &peer->updates[aid]); if (p != NULL && p->eor) { + /* + * Need to remove eor from update tree because + * prefix_adjout_destroy() can't handle that. + */ RB_REMOVE(prefix_tree, &peer->updates[aid], p); - prefix_destroy(p); + p->flags &= ~PREFIX_FLAG_MASK; + prefix_adjout_destroy(p); return 1; } return 0; @@ -616,11 +615,11 @@ up_dump_prefix(u_char *buf, int len, struct prefix_tree *prefix_head, /* prefix sent, remove from list and clear flag */ RB_REMOVE(prefix_tree, prefix_head, p); - p->flags = 0; + p->flags &= ~PREFIX_FLAG_MASK; if (withdraw) { /* prefix no longer needed, remove it */ - prefix_destroy(p); + prefix_adjout_destroy(p); peer->up_wcnt--; peer->prefix_sent_withdraw++; } else { |