From 519ec0368c242d9324c19a3161017fd6c9a17579 Mon Sep 17 00:00:00 2001 From: Claudio Jeker Date: Wed, 24 Oct 2018 08:26:38 +0000 Subject: Major refactoring of the RIB handling code. Mainly change how the RIB is walked. rib_dump_r() is now an internal function and instead the code gets an additional callback for throttling the rib_dump code. This removes a lot of similar code used to make sure the RDE is not walking to fast and replaces it with simpler callbacks. The other big change is the removal of struct rib pointers in other data structures. The rib pointers are not stable because of a realloc() call happening when extending the array so instead use the RIB ID as a reference. Tested and OK denis@ and benno@ --- usr.sbin/bgpd/mrt.c | 6 +- usr.sbin/bgpd/rde.c | 308 +++++++++++++++++++-------------------------- usr.sbin/bgpd/rde.h | 99 +++++++-------- usr.sbin/bgpd/rde_rib.c | 206 ++++++++++++++++++++++-------- usr.sbin/bgpd/rde_update.c | 6 +- usr.sbin/bgpd/session.h | 4 +- 6 files changed, 338 insertions(+), 291 deletions(-) (limited to 'usr.sbin/bgpd') diff --git a/usr.sbin/bgpd/mrt.c b/usr.sbin/bgpd/mrt.c index 0f606eb91be..7c7f2193db3 100644 --- a/usr.sbin/bgpd/mrt.c +++ b/usr.sbin/bgpd/mrt.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mrt.c,v 1.86 2018/07/24 10:10:58 claudio Exp $ */ +/* $OpenBSD: mrt.c,v 1.87 2018/10/24 08:26:37 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker @@ -673,10 +673,8 @@ mrt_dump_upcall(struct rib_entry *re, void *ptr) } void -mrt_done(void *ptr) +mrt_done(struct mrt *mrtbuf) { - struct mrt *mrtbuf = ptr; - mrtbuf->state = MRT_STATE_REMOVE; } diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index 7c51d0aa696..710995346d0 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.439 2018/10/24 08:18:14 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.440 2018/10/24 08:26:37 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -72,23 +72,15 @@ void rde_reflector(struct rde_peer *, struct rde_aspath *); void rde_dump_ctx_new(struct ctl_show_rib_request *, pid_t, enum imsg_type); void rde_dump_ctx_throttle(pid_t pid, int throttle); -void rde_dump_runner(void); -int rde_dump_pending(void); -void rde_dump_done(void *); void rde_dump_mrt_new(struct mrt *, pid_t, int); -void rde_dump_rib_free(struct rib *); -void rde_dump_mrt_free(struct rib *); -void rde_rib_free(struct rib_desc *); int rde_rdomain_import(struct rde_aspath *, struct rdomain *); void rde_reload_done(void); -static void rde_reload_runner(void); -static void rde_softreconfig_in_done(void *); -static void rde_softreconfig_out_done(void *); +static void rde_softreconfig_in_done(void *, u_int8_t); +static void rde_softreconfig_out_done(void *, u_int8_t); static void rde_softreconfig_done(void); static void rde_softreconfig_out(struct rib_entry *, void *); static void rde_softreconfig_in(struct rib_entry *, void *); -void rde_up_dump_upcall(struct rib_entry *, void *); void rde_update_queue_runner(void); void rde_update6_queue_runner(u_int8_t); struct rde_prefixset *rde_find_prefixset(char *, struct rde_prefixset_head *); @@ -138,7 +130,6 @@ int softreconfig; struct rde_dump_ctx { LIST_ENTRY(rde_dump_ctx) entry; - struct rib_context ribctx; struct ctl_show_rib_request req; sa_family_t af; u_int8_t throttled; @@ -148,7 +139,6 @@ LIST_HEAD(, rde_dump_ctx) rde_dump_h = LIST_HEAD_INITIALIZER(rde_dump_h); struct rde_mrt_ctx { LIST_ENTRY(rde_mrt_ctx) entry; - struct rib_context ribctx; struct mrt mrt; }; @@ -264,20 +254,13 @@ rde_main(int debug, int verbose) set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se); set_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl); - if (rde_dump_pending() && - ibuf_se_ctl && ibuf_se_ctl->w.queued == 0) - timeout = 0; - if (softreconfig) + if (rib_dump_pending()) timeout = 0; i = PFD_PIPE_COUNT; for (mctx = LIST_FIRST(&rde_mrts); mctx != 0; mctx = xmctx) { xmctx = LIST_NEXT(mctx, entry); - if (mctx->mrt.state != MRT_STATE_REMOVE && - mctx->mrt.wbuf.queued == 0) - rib_dump_r(&mctx->ribctx); - if (mctx->mrt.wbuf.queued) { pfd[i].fd = mctx->mrt.wbuf.fd; pfd[i].events = POLLOUT; @@ -331,11 +314,7 @@ rde_main(int debug, int verbose) for (aid = AID_INET6; aid < AID_MAX; aid++) rde_update6_queue_runner(aid); } - if (rde_dump_pending() && - ibuf_se_ctl && ibuf_se_ctl->w.queued <= 10) - rde_dump_runner(); - if (softreconfig) - rde_reload_runner(); + rib_dump_runner(); } /* do not clean up on shutdown on production, it takes ages. */ @@ -803,7 +782,7 @@ rde_dispatch_imsg_parent(struct imsgbuf *ibuf) sizeof(struct rde_rib)) fatalx("IMSG_RECONF_RIB bad len"); memcpy(&rn, imsg.data, sizeof(rn)); - rib = rib_find(rn.name); + rib = rib_byid(rib_find(rn.name)); if (rib == NULL) rib = rib_new(rn.name, rn.rtableid, rn.flags); else if (rib->rtableid != rn.rtableid || @@ -819,7 +798,7 @@ rde_dispatch_imsg_parent(struct imsgbuf *ibuf) */ in_rules = ribd->in_rules; ribd->in_rules = NULL; - rde_rib_free(ribd); + rib_free(rib); rib = rib_new(rn.name, rn.rtableid, rn.flags); ribd->in_rules = in_rules; } else @@ -862,7 +841,7 @@ rde_dispatch_imsg_parent(struct imsgbuf *ibuf) } } TAILQ_INIT(&r->set); - if ((rib = rib_find(r->rib)) == NULL) { + if ((rib = rib_byid(rib_find(r->rib))) == NULL) { log_warnx("IMSG_RECONF_FILTER: filter rule " "for nonexistent rib %s", r->rib); parent_set = NULL; @@ -2320,17 +2299,37 @@ rde_dump_prefix_upcall(struct rib_entry *re, void *ptr) rde_dump_filter(p, &ctx->req); } +static int +rde_dump_throttled(void *arg) +{ + struct rde_dump_ctx *ctx = arg; + + return (ctx->throttled != 0); +} + +static void +rde_dump_done(void *arg, u_int8_t aid) +{ + struct rde_dump_ctx *ctx = arg; + + imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, + -1, NULL, 0); + LIST_REMOVE(ctx, entry); + free(ctx); +} + void rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, enum imsg_type type) { struct rde_dump_ctx *ctx; - struct rib *rib; struct rib_entry *re; u_int error; u_int8_t hostplen; + u_int16_t rid; if ((ctx = calloc(1, sizeof(*ctx))) == NULL) { + nomem: log_warn("rde_dump_ctx_new"); error = CTL_RES_NOMEM; imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, @@ -2338,8 +2337,8 @@ rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, return; } if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) { - rib = &ribs[RIB_ADJ_IN].rib; - } else if ((rib = rib_find(req->rib)) == NULL) { + rid = RIB_ADJ_IN; + } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) { log_warnx("rde_dump_ctx_new: no such rib %s", req->rib); error = CTL_RES_NOSUCHPEER; imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, @@ -2351,22 +2350,28 @@ rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request)); ctx->req.pid = pid; ctx->req.type = type; - ctx->ribctx.ctx_count = CTL_MSG_HIGH_MARK; - ctx->ribctx.ctx_rib = rib; switch (ctx->req.type) { case IMSG_CTL_SHOW_NETWORK: - ctx->ribctx.ctx_upcall = network_dump_upcall; + if (rib_dump_new(rid, ctx->req.aid, CTL_MSG_HIGH_MARK, ctx, + network_dump_upcall, rde_dump_done, + rde_dump_throttled) == -1) + goto nomem; break; case IMSG_CTL_SHOW_RIB: case IMSG_CTL_SHOW_RIB_AS: case IMSG_CTL_SHOW_RIB_COMMUNITY: case IMSG_CTL_SHOW_RIB_EXTCOMMUNITY: case IMSG_CTL_SHOW_RIB_LARGECOMMUNITY: - ctx->ribctx.ctx_upcall = rde_dump_upcall; + if (rib_dump_new(rid, ctx->req.aid, CTL_MSG_HIGH_MARK, ctx, + rde_dump_upcall, rde_dump_done, rde_dump_throttled) == -1) + goto nomem; break; case IMSG_CTL_SHOW_RIB_PREFIX: if (req->flags & F_LONGER) { - ctx->ribctx.ctx_upcall = rde_dump_prefix_upcall; + if (rib_dump_new(rid, ctx->req.aid, + CTL_MSG_HIGH_MARK, ctx, rde_dump_prefix_upcall, + rde_dump_done, rde_dump_throttled) == -1) + goto nomem; break; } switch (req->prefix.aid) { @@ -2381,9 +2386,10 @@ rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, fatalx("rde_dump_ctx_new: unknown af"); } if (req->prefixlen == hostplen) - re = rib_lookup(rib, &req->prefix); + re = rib_lookup(rib_byid(rid), &req->prefix); else - re = rib_get(rib, &req->prefix, req->prefixlen); + re = rib_get(rib_byid(rid), &req->prefix, + req->prefixlen); if (re) rde_dump_upcall(re, ctx); imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, @@ -2393,11 +2399,7 @@ rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, default: fatalx("rde_dump_ctx_new: unsupported imsg type"); } - ctx->ribctx.ctx_done = rde_dump_done; - ctx->ribctx.ctx_arg = ctx; - ctx->ribctx.ctx_aid = ctx->req.aid; LIST_INSERT_HEAD(&rde_dump_h, ctx, entry); - rib_dump_r(&ctx->ribctx); } void @@ -2413,59 +2415,25 @@ rde_dump_ctx_throttle(pid_t pid, int throttle) } } -void -rde_dump_runner(void) +static int +rde_mrt_throttled(void *arg) { - struct rde_dump_ctx *ctx, *next; + struct mrt *mrt = arg; - for (ctx = LIST_FIRST(&rde_dump_h); ctx != NULL; ctx = next) { - next = LIST_NEXT(ctx, entry); - if (!ctx->throttled) - rib_dump_r(&ctx->ribctx); - } + return (mrt->wbuf.queued > SESS_MSG_LOW_MARK); } -int -rde_dump_pending(void) -{ - struct rde_dump_ctx *ctx; - - /* return true if there is at least one unthrottled context */ - LIST_FOREACH(ctx, &rde_dump_h, entry) - if (!ctx->throttled) - return (1); - - return (0); -} - -void -rde_dump_done(void *arg) -{ - struct rde_dump_ctx *ctx = arg; - - imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, - -1, NULL, 0); - LIST_REMOVE(ctx, entry); - free(ctx); -} - -void -rde_dump_rib_free(struct rib *rib) +static void +rde_mrt_done(void *ptr, u_int8_t aid) { - struct rde_dump_ctx *ctx, *next; - - for (ctx = LIST_FIRST(&rde_dump_h); ctx != NULL; ctx = next) { - next = LIST_NEXT(ctx, entry); - if (ctx->ribctx.ctx_rib == rib) - rde_dump_done(ctx); - } + mrt_done(ptr); } void rde_dump_mrt_new(struct mrt *mrt, pid_t pid, int fd) { - struct rde_mrt_ctx *ctx; - struct rib *rib; + struct rde_mrt_ctx *ctx; + u_int16_t rid; if ((ctx = calloc(1, sizeof(*ctx))) == NULL) { log_warn("rde_dump_mrt_new"); @@ -2475,8 +2443,8 @@ rde_dump_mrt_new(struct mrt *mrt, pid_t pid, int fd) TAILQ_INIT(&ctx->mrt.wbuf.bufs); ctx->mrt.wbuf.fd = fd; ctx->mrt.state = MRT_STATE_RUNNING; - rib = rib_find(ctx->mrt.rib); - if (rib == NULL) { + rid = rib_find(ctx->mrt.rib); + if (rid == RIB_NOTFOUND) { log_warnx("non existing RIB %s for mrt dump", ctx->mrt.rib); free(ctx); return; @@ -2485,37 +2453,12 @@ rde_dump_mrt_new(struct mrt *mrt, pid_t pid, int fd) if (ctx->mrt.type == MRT_TABLE_DUMP_V2) mrt_dump_v2_hdr(&ctx->mrt, conf, &peerlist); - ctx->ribctx.ctx_count = CTL_MSG_HIGH_MARK; - ctx->ribctx.ctx_rib = rib; - ctx->ribctx.ctx_upcall = mrt_dump_upcall; - ctx->ribctx.ctx_done = mrt_done; - ctx->ribctx.ctx_arg = &ctx->mrt; - ctx->ribctx.ctx_aid = AID_UNSPEC; + if (rib_dump_new(rid, AID_UNSPEC, CTL_MSG_HIGH_MARK, &ctx->mrt, + mrt_dump_upcall, rde_mrt_done, rde_mrt_throttled) == -1) + fatal("%s: rib_dump_new", __func__); + LIST_INSERT_HEAD(&rde_mrts, ctx, entry); rde_mrt_cnt++; - rib_dump_r(&ctx->ribctx); -} - -void -rde_dump_mrt_free(struct rib *rib) -{ - struct rde_mrt_ctx *ctx, *next; - - for (ctx = LIST_FIRST(&rde_mrts); ctx != NULL; ctx = next) { - next = LIST_NEXT(ctx, entry); - if (ctx->ribctx.ctx_rib == rib) - mrt_done(&ctx->mrt); - } -} - -void -rde_rib_free(struct rib_desc *rd) -{ - /* abort pending rib_dumps */ - rde_dump_rib_free(&rd->rib); - rde_dump_mrt_free(&rd->rib); - - rib_free(&rd->rib); } /* @@ -2622,7 +2565,7 @@ rde_generate_updates(struct rib *rib, struct prefix *new, struct prefix *old) LIST_FOREACH(peer, &peerlist, peer_l) { if (peer->conf.id == 0) continue; - if (peer->rib != rib) + if (peer->loc_rib_id != rib->id) continue; if (peer->state != PEER_UP) continue; @@ -2630,20 +2573,30 @@ rde_generate_updates(struct rib *rib, struct prefix *new, struct prefix *old) } } -u_char queue_buf[4096]; - -void +static void rde_up_dump_upcall(struct rib_entry *re, void *ptr) { struct rde_peer *peer = ptr; - if (re_rib(re) != peer->rib) - fatalx("King Bula: monstrous evil horror."); + if (re->rib_id != peer->loc_rib_id) + fatalx("%s: Unexpected RIB %u != %u.", __func__, re->rib_id, + peer->loc_rib_id); if (re->active == NULL) return; up_generate_updates(out_rules, peer, re->active, NULL); } +static void +rde_up_dump_done(void *ptr, u_int8_t aid) +{ + struct rde_peer *peer = ptr; + + if (peer->capa.grestart.restart) + up_generate_marker(peer, aid); +} + +u_char queue_buf[4096]; + void rde_update_queue_runner(void) { @@ -2845,7 +2798,6 @@ rde_reload_done(void) struct rdomain *rd; struct rde_peer *peer; struct filter_head *fh; - struct rib_context *ctx; u_int16_t rid; int reload = 0; @@ -2931,10 +2883,13 @@ rde_reload_done(void) continue; peer->reconf_out = 0; peer->reconf_rib = 0; - if (peer->rib != rib_find(peer->conf.rib)) { + if (peer->loc_rib_id != rib_find(peer->conf.rib)) { + char *p = log_fmt_peer(&peer->conf); + log_debug("rib change: reloading peer %s", p); + free(p); up_withdraw_all(peer); - peer->rib = rib_find(peer->conf.rib); - if (peer->rib == NULL) + peer->loc_rib_id = rib_find(peer->conf.rib); + if (peer->loc_rib_id == RIB_NOTFOUND) fatalx("King Bula's peer met an unknown RIB"); peer->reconf_rib = 1; continue; @@ -2959,7 +2914,7 @@ rde_reload_done(void) switch (ribs[rid].state) { case RECONF_DELETE: - rde_rib_free(&ribs[rid]); + rib_free(&ribs[rid].rib); break; case RECONF_KEEP: if (rde_filter_equal(ribs[rid].in_rules, @@ -2986,48 +2941,37 @@ rde_reload_done(void) } log_info("RDE reconfigured"); - softreconfig++; + softreconfig = 0; if (reload > 0) { - ctx = &ribs[RIB_ADJ_IN].ribctx; - memset(ctx, 0, sizeof(*ctx)); - ctx->ctx_rib = &ribs[RIB_ADJ_IN].rib; - ctx->ctx_arg = &ribs[RIB_ADJ_IN]; - ctx->ctx_upcall = rde_softreconfig_in; - ctx->ctx_done = rde_softreconfig_in_done; - ctx->ctx_aid = AID_UNSPEC; - ctx->ctx_count = RDE_RUNNER_ROUNDS; - ribs[RIB_ADJ_IN].dumping = 1; log_info("running softreconfig in"); + softreconfig++; + if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC, + RDE_RUNNER_ROUNDS, &ribs[RIB_ADJ_IN], rde_softreconfig_in, + rde_softreconfig_in_done, NULL) == -1) + fatal("%s: rib_dump_new", __func__); } else { - rde_softreconfig_in_done(&ribs[RIB_ADJ_IN]); + rde_softreconfig_in_done(NULL, AID_UNSPEC); } } static void -rde_reload_runner(void) +rde_softreconfig_in_done(void *arg, u_int8_t aid) { - u_int16_t rid; - - for (rid = 0; rid < rib_size; rid++) { - if (!rib_valid(rid)) - continue; - if (ribs[rid].dumping) - rib_dump_r(&ribs[rid].ribctx); - } -} - -static void -rde_softreconfig_in_done(void *arg) -{ - struct rib_desc *rib = arg; + struct rib_desc *rd = arg; struct rde_peer *peer; u_int16_t rid; - /* Adj-RIB-In run is done */ - softreconfig--; - rib->dumping = 0; + if (rd != NULL) { + softreconfig--; + /* one guy done but other dumps are still running */ + if (softreconfig > 0) + return; + + log_info("softreconfig in done"); + } /* now do the Adj-RIB-Out sync */ + softreconfig = 0; for (rid = 0; rid < rib_size; rid++) { if (!rib_valid(rid)) continue; @@ -3036,27 +2980,26 @@ rde_softreconfig_in_done(void *arg) LIST_FOREACH(peer, &peerlist, peer_l) { if (peer->reconf_out) - ribs[peer->rib->id].state = RECONF_RELOAD; - else if (peer->reconf_rib) + ribs[peer->loc_rib_id].state = RECONF_RELOAD; + else if (peer->reconf_rib) { + u_int8_t i; + /* dump the full table to neighbors that changed rib */ - peer_dump(peer->conf.id, AID_UNSPEC); + for (i = 0; i < AID_MAX; i++) { + if (peer->capa.mp[i]) + peer_dump(peer->conf.id, i); + } + } } for (rid = 0; rid < rib_size; rid++) { if (!rib_valid(rid)) continue; if (ribs[rid].state == RECONF_RELOAD) { - struct rib_context *ctx; - - ctx = &ribs[rid].ribctx; - memset(ctx, 0, sizeof(*ctx)); - ctx->ctx_rib = &ribs[rid].rib; - ctx->ctx_arg = &ribs[rid]; - ctx->ctx_upcall = rde_softreconfig_out; - ctx->ctx_done = rde_softreconfig_out_done; - ctx->ctx_aid = AID_UNSPEC; - ctx->ctx_count = RDE_RUNNER_ROUNDS; - ribs[rid].dumping = 1; + if (rib_dump_new(rid, AID_UNSPEC, RDE_RUNNER_ROUNDS, + &ribs[rid], rde_softreconfig_out, + rde_softreconfig_out_done, NULL) == -1) + fatal("%s: rib_dump_new", __func__); softreconfig++; log_info("starting softreconfig out for rib %s", ribs[rid].name); @@ -3069,13 +3012,12 @@ rde_softreconfig_in_done(void *arg) } static void -rde_softreconfig_out_done(void *arg) +rde_softreconfig_out_done(void *arg, u_int8_t aid) { struct rib_desc *rib = arg; /* this RIB dump is done */ softreconfig--; - rib->dumping = 0; log_info("softreconfig out done for %s", rib->name); /* but other dumps are still running */ @@ -3225,7 +3167,7 @@ rde_softreconfig_out(struct rib_entry *re, void *bula) return; LIST_FOREACH(peer, &peerlist, peer_l) { - if (peer->rib == re_rib(re) && peer->reconf_out) + if (peer->loc_rib_id == re->rib_id && peer->reconf_out) rde_softreconfig_out_peer(re, peer); } } @@ -3345,8 +3287,8 @@ peer_add(u_int32_t id, struct peer_config *p_conf) TAILQ_INIT(&peer->path_h); memcpy(&peer->conf, p_conf, sizeof(struct peer_config)); peer->remote_bgpid = 0; - peer->rib = rib_find(peer->conf.rib); - if (peer->rib == NULL) + peer->loc_rib_id = rib_find(peer->conf.rib); + if (peer->loc_rib_id == RIB_NOTFOUND) fatalx("King Bula's new peer met an unknown RIB"); peer->state = PEER_NONE; up_init(peer); @@ -3480,6 +3422,8 @@ peer_down(u_int32_t id) peer->remote_bgpid = 0; peer->state = PEER_DOWN; up_down(peer); + /* stop all pending dumps which may depend on this peer */ + rib_dump_terminate(peer->loc_rib_id, peer, rde_up_dump_upcall); /* walk through per peer RIB list and remove all prefixes. */ for (asp = TAILQ_FIRST(&peer->path_h); asp != NULL; asp = nasp) { @@ -3561,14 +3505,18 @@ peer_dump(u_int32_t id, u_int8_t aid) } if (peer->conf.export_type == EXPORT_NONE) { - /* nothing */; + /* nothing to send apart from the marker */ + if (peer->capa.grestart.restart) + up_generate_marker(peer, aid); } else if (peer->conf.export_type == EXPORT_DEFAULT_ROUTE) { up_generate_default(out_rules, peer, aid); + if (peer->capa.grestart.restart) + up_generate_marker(peer, aid); } else { - rib_dump(peer->rib, rde_up_dump_upcall, peer, aid); + if (rib_dump_new(peer->loc_rib_id, aid, RDE_RUNNER_ROUNDS, peer, + rde_up_dump_upcall, rde_up_dump_done, NULL) == -1) + fatal("%s: rib_dump_new", __func__); } - if (peer->capa.grestart.restart) - up_generate_marker(peer, aid); } /* End-of-RIB marker, RFC 4724 */ diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h index 677cf963365..6e73ff6f1a1 100644 --- a/usr.sbin/bgpd/rde.h +++ b/usr.sbin/bgpd/rde.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.h,v 1.197 2018/10/15 10:44:47 claudio Exp $ */ +/* $OpenBSD: rde.h,v 1.198 2018/10/24 08:26:37 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker and @@ -36,6 +36,38 @@ enum peer_state { PEER_ERR /* error occurred going to PEER_DOWN state */ }; +LIST_HEAD(prefix_list, prefix); +RB_HEAD(rib_tree, rib_entry); + +struct rib_entry { + RB_ENTRY(rib_entry) rib_e; + struct prefix_list prefix_h; + struct prefix *active; /* for fast access */ + struct pt_entry *prefix; + u_int16_t rib_id; + u_int16_t lock; +}; + +struct rib { + struct rib_tree tree; + u_int rtableid; + u_int16_t flags; + u_int16_t id; +}; + +#define RIB_ADJ_IN 0 +#define RIB_ADJ_OUT 1 +#define RIB_LOC_START 2 +#define RIB_NOTFOUND 0xffff + +struct rib_desc { + char name[PEER_DESCR_LEN]; + struct rib rib; + struct filter_head *in_rules; + struct filter_head *in_rules_tmp; + enum reconf_action state; +}; + /* * How do we identify peers between the session handler and the rde? * Currently I assume that we can do that with the neighbor_ip... @@ -43,7 +75,6 @@ enum peer_state { LIST_HEAD(rde_peer_head, rde_peer); LIST_HEAD(aspath_list, aspath); LIST_HEAD(attr_list, attr); -LIST_HEAD(prefix_list, prefix); TAILQ_HEAD(prefix_queue, prefix); LIST_HEAD(aspath_head, rde_aspath); TAILQ_HEAD(aspath_queue, rde_aspath); @@ -51,9 +82,6 @@ RB_HEAD(uptree_prefix, update_prefix); RB_HEAD(uptree_attr, update_attr); RB_HEAD(uptree_rib, update_rib); -struct rib_desc; -struct rib; -RB_HEAD(rib_tree, rib_entry); TAILQ_HEAD(uplist_prefix, update_prefix); TAILQ_HEAD(uplist_attr, update_attr); @@ -72,7 +100,6 @@ struct rde_peer { struct uplist_prefix withdraws[AID_MAX]; time_t staletime[AID_MAX]; struct capabilities capa; - struct rib *rib; u_int64_t prefix_rcvd_update; u_int64_t prefix_rcvd_withdraw; u_int64_t prefix_rcvd_eor; @@ -86,6 +113,7 @@ struct rde_peer { u_int32_t up_nlricnt; u_int32_t up_wcnt; enum peer_state state; + u_int16_t loc_rib_id; u_int16_t short_as; u_int16_t mrt_idx; u_int8_t reconf_out; /* out filter changed */ @@ -260,46 +288,6 @@ struct pt_entry_vpn4 { u_int8_t pad2; }; -struct rib_context { - struct rib_entry *ctx_re; - struct rib *ctx_rib; - void (*ctx_upcall)(struct rib_entry *, void *); - void (*ctx_done)(void *); - void (*ctx_wait)(void *); - void *ctx_arg; - unsigned int ctx_count; - u_int8_t ctx_aid; -}; - -struct rib_entry { - RB_ENTRY(rib_entry) rib_e; - struct prefix_list prefix_h; - struct prefix *active; /* for fast access */ - struct pt_entry *prefix; - struct rib *__rib; /* mangled pointer with flags */ -}; - -struct rib { - struct rib_tree tree; - u_int rtableid; - u_int16_t flags; - u_int16_t id; -}; - -struct rib_desc { - char name[PEER_DESCR_LEN]; - struct rib rib; - struct rib_context ribctx; - struct filter_head *in_rules; - struct filter_head *in_rules_tmp; - enum reconf_action state; - u_int8_t dumping; -}; - -#define RIB_ADJ_IN 0 -#define RIB_ADJ_OUT 1 -#define RIB_LOC_START 2 - struct prefix { LIST_ENTRY(prefix) rib_l, nexthop_l; TAILQ_ENTRY(prefix) path_l; @@ -330,7 +318,6 @@ extern struct rde_memstats rdemem; int mrt_dump_v2_hdr(struct mrt *, struct bgpd_config *, struct rde_peer_head *); void mrt_dump_upcall(struct rib_entry *, void *); -void mrt_done(void *); /* rde.c */ void rde_send_kroute(struct rib *, struct prefix *, struct prefix *); @@ -454,25 +441,31 @@ extern u_int16_t rib_size; extern struct rib_desc *ribs; struct rib *rib_new(char *, u_int, u_int16_t); -struct rib *rib_find(char *); +struct rib *rib_byid(u_int16_t); +u_int16_t rib_find(char *); struct rib_desc *rib_desc(struct rib *); void rib_free(struct rib *); struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int); struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *); -void rib_dump(struct rib *, void (*)(struct rib_entry *, void *), - void *, u_int8_t); -void rib_dump_r(struct rib_context *); +int rib_dump_pending(void); +void rib_dump_runner(void); +int rib_dump_new(u_int16_t, u_int8_t, unsigned int, void *, + void (*)(struct rib_entry *, void *), + void (*)(void *, u_int8_t), + int (*)(void *)); +void rib_dump_terminate(u_int16_t, void *, + void (*)(struct rib_entry *, void *)); static inline struct rib * re_rib(struct rib_entry *re) { - return (struct rib *)((intptr_t)re->__rib & ~1); + return rib_byid(re->rib_id); } static inline int rib_valid(u_int16_t rid) { - if (rid >= rib_size || *ribs[rid].name == '\0') + if (rid == RIB_NOTFOUND || rid >= rib_size || *ribs[rid].name == '\0') return 0; return 1; } diff --git a/usr.sbin/bgpd/rde_rib.c b/usr.sbin/bgpd/rde_rib.c index f6200a7641a..75c4b6122e3 100644 --- a/usr.sbin/bgpd/rde_rib.c +++ b/usr.sbin/bgpd/rde_rib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_rib.c,v 1.179 2018/09/29 08:11:11 claudio Exp $ */ +/* $OpenBSD: rde_rib.c,v 1.180 2018/10/24 08:26:37 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker @@ -40,36 +40,56 @@ u_int16_t rib_size; struct rib_desc *ribs; struct rib_entry *rib_add(struct rib *, struct bgpd_addr *, int); -int rib_compare(const struct rib_entry *, const struct rib_entry *); +static inline int rib_compare(const struct rib_entry *, + const struct rib_entry *); void rib_remove(struct rib_entry *); int rib_empty(struct rib_entry *); -struct rib_entry *rib_restart(struct rib_context *); +static void rib_dump_abort(u_int16_t); RB_PROTOTYPE(rib_tree, rib_entry, rib_e, rib_compare); RB_GENERATE(rib_tree, rib_entry, rib_e, rib_compare); +struct rib_context { + LIST_ENTRY(rib_context) entry; + struct rib_entry *ctx_re; + u_int16_t ctx_rib_id; + void (*ctx_upcall)(struct rib_entry *, void *); + void (*ctx_done)(void *, u_int8_t); + int (*ctx_throttle)(void *); + void *ctx_arg; + unsigned int ctx_count; + u_int8_t ctx_aid; +}; +LIST_HEAD(, rib_context) rib_dumps = LIST_HEAD_INITIALIZER(rib_dumps); + static int prefix_add(struct bgpd_addr *, int, struct rib *, struct rde_peer *, struct rde_aspath *, struct filterstate *, u_int8_t); static int prefix_move(struct prefix *, struct rde_peer *, struct rde_aspath *, struct filterstate *, u_int8_t); -static inline void +static inline struct rib_entry * re_lock(struct rib_entry *re) { - re->__rib = (struct rib *)((intptr_t)re->__rib | 1); + if (re->lock != 0) + log_warnx("%s: entry already locked", __func__); + re->lock = 1; + return re; } -static inline void +static inline struct rib_entry * re_unlock(struct rib_entry *re) { - re->__rib = (struct rib *)((intptr_t)re->__rib & ~1); + if (re->lock == 0) + log_warnx("%s: entry already unlocked", __func__); + re->lock = 0; + return re; } static inline int re_is_locked(struct rib_entry *re) { - return ((intptr_t)re->__rib & 1); + return (re->lock != 0); } static inline struct rib_tree * @@ -78,6 +98,12 @@ rib_tree(struct rib *rib) return (&rib->tree); } +static inline int +rib_compare(const struct rib_entry *a, const struct rib_entry *b) +{ + return (pt_prefix_cmp(a->prefix, b->prefix)); +} + /* RIB specific functions */ struct rib * rib_new(char *name, u_int rtableid, u_int16_t flags) @@ -94,7 +120,7 @@ rib_new(char *name, u_int rtableid, u_int16_t flags) if ((xribs = reallocarray(ribs, id + 1, sizeof(struct rib_desc))) == NULL) { /* XXX this is not clever */ - fatal("rib_add"); + fatal(NULL); } ribs = xribs; rib_size = id + 1; @@ -113,24 +139,33 @@ rib_new(char *name, u_int rtableid, u_int16_t flags) fatal(NULL); TAILQ_INIT(ribs[id].in_rules); + log_debug("%s: %s -> %u", __func__, name, id); return (&ribs[id].rib); } struct rib * +rib_byid(u_int16_t rid) +{ + if (rib_valid(rid)) + return &ribs[rid].rib; + return NULL; +} + +u_int16_t rib_find(char *name) { u_int16_t id; /* no name returns the first Loc-RIB */ if (name == NULL || *name == '\0') - return (&ribs[RIB_LOC_START].rib); + return RIB_LOC_START; for (id = 0; id < rib_size; id++) { if (!strcmp(ribs[id].name, name)) - return (&ribs[id].rib); + return id; } - return (NULL); + return RIB_NOTFOUND; } struct rib_desc * @@ -146,6 +181,8 @@ rib_free(struct rib *rib) struct rib_entry *re, *xre; struct prefix *p, *np; + rib_dump_abort(rib->id); + for (re = RB_MIN(rib_tree, rib_tree(rib)); re != NULL; re = xre) { xre = RB_NEXT(rib_tree, rib_tree(rib), re); @@ -177,23 +214,21 @@ rib_free(struct rib *rib) bzero(rd, sizeof(struct rib_desc)); } -int -rib_compare(const struct rib_entry *a, const struct rib_entry *b) -{ - return (pt_prefix_cmp(a->prefix, b->prefix)); -} - struct rib_entry * rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) { - struct rib_entry xre; + struct rib_entry xre, *re; struct pt_entry *pte; pte = pt_fill(prefix, prefixlen); bzero(&xre, sizeof(xre)); xre.prefix = pte; - return (RB_FIND(rib_tree, rib_tree(rib), &xre)); + re = RB_FIND(rib_tree, rib_tree(rib), &xre); + if (re && re->rib_id != rib->id) + fatalx("%s: Unexpected RIB %u != %u.", __func__, + re->rib_id, rib->id); + return re; } struct rib_entry * @@ -240,7 +275,7 @@ rib_add(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) LIST_INIT(&re->prefix_h); re->prefix = pte; - re->__rib = rib; + re->rib_id = rib->id; if (RB_INSERT(rib_tree, rib_tree(rib), re) != NULL) { log_warnx("rib_add: insert failed"); @@ -282,33 +317,45 @@ rib_empty(struct rib_entry *re) return LIST_EMPTY(&re->prefix_h); } -void -rib_dump(struct rib *rib, void (*upcall)(struct rib_entry *, void *), - void *arg, u_int8_t aid) +static struct rib_entry * +rib_restart(struct rib_context *ctx) { - struct rib_context *ctx; + struct rib_entry *re; - if ((ctx = calloc(1, sizeof(*ctx))) == NULL) - fatal("rib_dump"); - ctx->ctx_rib = rib; - ctx->ctx_upcall = upcall; - ctx->ctx_arg = arg; - ctx->ctx_aid = aid; - rib_dump_r(ctx); + re = ctx->ctx_re; + re_unlock(re); + + /* find first non empty element */ + while (re && rib_empty(re)) + re = RB_NEXT(rib_tree, unused, re); + + /* free the previously locked rib element if empty */ + if (rib_empty(ctx->ctx_re)) + rib_remove(ctx->ctx_re); + ctx->ctx_re = NULL; + return (re); } -void +static void rib_dump_r(struct rib_context *ctx) { + struct rib *rib; struct rib_entry *re; unsigned int i; + rib = rib_byid(ctx->ctx_rib_id); + if (rib == NULL) + fatalx("%s: rib id %u gone", __func__, ctx->ctx_rib_id); + if (ctx->ctx_re == NULL) - re = RB_MIN(rib_tree, rib_tree(ctx->ctx_rib)); + re = RB_MIN(rib_tree, rib_tree(rib)); else re = rib_restart(ctx); for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) { + if (re->rib_id != ctx->ctx_rib_id) + fatalx("%s: Unexpected RIB %u != %u.", __func__, + re->rib_id, ctx->ctx_rib_id); if (ctx->ctx_aid != AID_UNSPEC && ctx->ctx_aid != re->prefix->aid) continue; @@ -323,28 +370,89 @@ rib_dump_r(struct rib_context *ctx) } if (ctx->ctx_done) - ctx->ctx_done(ctx->ctx_arg); - else + ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid); + LIST_REMOVE(ctx, entry); + free(ctx); +} + +int +rib_dump_pending(void) +{ + struct rib_context *ctx; + + /* return true if at least one context is not throttled */ + LIST_FOREACH(ctx, &rib_dumps, entry) { + if (ctx->ctx_throttle && ctx->ctx_throttle(ctx->ctx_arg)) + continue; + return 1; + } + return 0; +} + +void +rib_dump_runner(void) +{ + struct rib_context *ctx, *next; + + LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) { + if (ctx->ctx_throttle && ctx->ctx_throttle(ctx->ctx_arg)) + continue; + rib_dump_r(ctx); + } +} + +static void +rib_dump_abort(u_int16_t id) +{ + struct rib_context *ctx, *next; + + LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) { + if (id != ctx->ctx_rib_id) + continue; + if (ctx->ctx_done) + ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid); + LIST_REMOVE(ctx, entry); free(ctx); + } } -struct rib_entry * -rib_restart(struct rib_context *ctx) +int +rib_dump_new(u_int16_t id, u_int8_t aid, unsigned int count, void *arg, + void (*upcall)(struct rib_entry *, void *), void (*done)(void *, u_int8_t), + int (*throttle)(void *)) { - struct rib_entry *re; + struct rib_context *ctx; - re = ctx->ctx_re; - re_unlock(re); + if ((ctx = calloc(1, sizeof(*ctx))) == NULL) + return -1; + ctx->ctx_rib_id = id; + ctx->ctx_aid = aid; + ctx->ctx_count = count; + ctx->ctx_arg = arg; + ctx->ctx_upcall = upcall; + ctx->ctx_done = done; + ctx->ctx_throttle = throttle; - /* find first non empty element */ - while (re && rib_empty(re)) - re = RB_NEXT(rib_tree, unused, re); + LIST_INSERT_HEAD(&rib_dumps, ctx, entry); - /* free the previously locked rib element if empty */ - if (rib_empty(ctx->ctx_re)) - rib_remove(ctx->ctx_re); - ctx->ctx_re = NULL; - return (re); + return 0; +} + +void +rib_dump_terminate(u_int16_t id, void *arg, + void (*upcall)(struct rib_entry *, void *)) +{ + struct rib_context *ctx, *next; + + LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) { + if (id != ctx->ctx_rib_id || ctx->ctx_arg != arg || + ctx->ctx_upcall != upcall) + continue; + if (ctx->ctx_done) + ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid); + LIST_REMOVE(ctx, entry); + free(ctx); + } } /* path specific functions */ diff --git a/usr.sbin/bgpd/rde_update.c b/usr.sbin/bgpd/rde_update.c index f1e35197f1f..013b57f7e44 100644 --- a/usr.sbin/bgpd/rde_update.c +++ b/usr.sbin/bgpd/rde_update.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_update.c,v 1.101 2018/10/15 10:44:47 claudio Exp $ */ +/* $OpenBSD: rde_update.c,v 1.102 2018/10/24 08:26:37 claudio Exp $ */ /* * Copyright (c) 2004 Claudio Jeker @@ -543,9 +543,9 @@ up_generate_default(struct filter_head *rules, struct rde_peer *peer, bzero(&p, sizeof(p)); bzero(&addr, sizeof(addr)); addr.aid = aid; - re = rib_get(peer->rib, &addr, 0); + re = rib_get(rib_byid(peer->loc_rib_id), &addr, 0); if (re == NULL) - re = rib_add(peer->rib, &addr, 0); + re = rib_add(rib_byid(peer->loc_rib_id), &addr, 0); p.re = re; p.aspath = asp; p.peer = peer; diff --git a/usr.sbin/bgpd/session.h b/usr.sbin/bgpd/session.h index aa2e20f8638..17b1b4529ea 100644 --- a/usr.sbin/bgpd/session.h +++ b/usr.sbin/bgpd/session.h @@ -1,4 +1,4 @@ -/* $OpenBSD: session.h,v 1.124 2018/09/20 11:06:04 benno Exp $ */ +/* $OpenBSD: session.h,v 1.125 2018/10/24 08:26:37 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -269,7 +269,7 @@ void mrt_dump_bgp_msg(struct mrt *, void *, u_int16_t, struct peer *); void mrt_dump_state(struct mrt *, u_int16_t, u_int16_t, struct peer *); -void mrt_done(void *); +void mrt_done(struct mrt *); /* parse.y */ int parse_config(char *, struct bgpd_config *, struct peer **); -- cgit v1.2.3