diff options
author | Claudio Jeker <claudio@cvs.openbsd.org> | 2007-01-26 17:40:50 +0000 |
---|---|---|
committer | Claudio Jeker <claudio@cvs.openbsd.org> | 2007-01-26 17:40:50 +0000 |
commit | 6509f2a92158a5ae0a602a01494b95b9b0c2820a (patch) | |
tree | d9b373292c4f4ebe349facbf75c8bebab22fdbe9 /usr.sbin | |
parent | bebe8c10e5e02a1f829ee1ad4638411fbe2442ec (diff) |
Massiv rework of the control imsg flow. Main changes:
- dedicated pipe between the SE and the RDE for control messages
- restartable RB tree dumps in the RDE
- queuing limits both in the SE and RDE
The result is a dramatic decrease of memory consumption on operations like
bgpctl show rib. Previously all messages where first stored in the RDE
then passed to the SE where they got queued in case bgpctl was not fast enough.
Now only a small number of messages is generated and passed to the SE and
the SE has an additional limit instead of acting like an infinite buffer.
Without this the bgpd on bgpd.networx.ch would not survive a single minute.
looks good henning@
Diffstat (limited to 'usr.sbin')
-rw-r--r-- | usr.sbin/bgpd/bgpd.c | 11 | ||||
-rw-r--r-- | usr.sbin/bgpd/bgpd.h | 15 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde.c | 161 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde.h | 18 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde_prefix.c | 107 | ||||
-rw-r--r-- | usr.sbin/bgpd/session.c | 37 | ||||
-rw-r--r-- | usr.sbin/bgpd/session.h | 6 |
7 files changed, 297 insertions, 58 deletions
diff --git a/usr.sbin/bgpd/bgpd.c b/usr.sbin/bgpd/bgpd.c index 710e2c6062c..2ac8bdfb3a4 100644 --- a/usr.sbin/bgpd/bgpd.c +++ b/usr.sbin/bgpd/bgpd.c @@ -1,4 +1,4 @@ -/* $OpenBSD: bgpd.c,v 1.142 2007/01/04 18:38:51 henning Exp $ */ +/* $OpenBSD: bgpd.c,v 1.143 2007/01/26 17:40:48 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -116,6 +116,7 @@ main(int argc, char *argv[]) int pipe_m2s[2]; int pipe_m2r[2]; int pipe_s2r[2]; + int pipe_s2r_c[2]; conffile = CONFFILE; bgpd_process = PROC_MAIN; @@ -205,20 +206,24 @@ main(int argc, char *argv[]) fatal("socketpair"); if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pipe_s2r) == -1) fatal("socketpair"); + if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pipe_s2r_c) == -1) + fatal("socketpair"); session_socket_blockmode(pipe_m2s[0], BM_NONBLOCK); session_socket_blockmode(pipe_m2s[1], BM_NONBLOCK); session_socket_blockmode(pipe_m2r[0], BM_NONBLOCK); session_socket_blockmode(pipe_m2r[1], BM_NONBLOCK); session_socket_blockmode(pipe_s2r[0], BM_NONBLOCK); session_socket_blockmode(pipe_s2r[1], BM_NONBLOCK); + session_socket_blockmode(pipe_s2r_c[0], BM_NONBLOCK); + session_socket_blockmode(pipe_s2r_c[1], BM_NONBLOCK); prepare_listeners(&conf); /* fork children */ rde_pid = rde_main(&conf, peer_l, &net_l, rules_l, &mrt_l, - pipe_m2r, pipe_s2r, pipe_m2s, debug); + pipe_m2r, pipe_s2r, pipe_m2s, pipe_s2r_c, debug); io_pid = session_main(&conf, peer_l, &net_l, rules_l, &mrt_l, - pipe_m2s, pipe_s2r, pipe_m2r); + pipe_m2s, pipe_s2r, pipe_m2r, pipe_s2r_c); setproctitle("parent"); diff --git a/usr.sbin/bgpd/bgpd.h b/usr.sbin/bgpd/bgpd.h index 2f962bd7886..c00aae0377f 100644 --- a/usr.sbin/bgpd/bgpd.h +++ b/usr.sbin/bgpd/bgpd.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bgpd.h,v 1.210 2006/12/05 12:08:13 henning Exp $ */ +/* $OpenBSD: bgpd.h,v 1.211 2007/01/26 17:40:49 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -83,6 +83,15 @@ #define F_CTL_ADJ_IN 0x2000 #define F_CTL_ADJ_OUT 0x4000 +/* + * Limit the number of control messages generated by the RDE and queued in + * session enigine. The RDE limit defines how many imsg are generated in + * on poll round. The if the SE limit is hit the RDE control socket will no + * longer be polled. + */ +#define RDE_RUNNER_ROUNDS 100 +#define SESSION_CTL_QUEUE_MAX 10000 + enum { PROC_MAIN, PROC_SE, @@ -361,7 +370,8 @@ enum ctl_results { CTL_RES_NOSUCHPEER, CTL_RES_DENIED, CTL_RES_NOCAP, - CTL_RES_PARSE_ERROR + CTL_RES_PARSE_ERROR, + CTL_RES_NOMEM }; /* needed for session.h parse prototype */ @@ -529,6 +539,7 @@ struct ctl_show_rib_request { u_int32_t peerid; pid_t pid; u_int16_t flags; + enum imsg_type type; sa_family_t af; u_int8_t prefixlen; }; diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index a4f8e169345..c5b7ae7ebf9 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.217 2007/01/24 13:24:51 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.218 2007/01/26 17:40:49 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -35,9 +35,10 @@ #include "rde.h" #include "session.h" -#define PFD_PIPE_MAIN 0 +#define PFD_PIPE_MAIN 0 #define PFD_PIPE_SESSION 1 -#define PFD_MRT_FILE 2 +#define PFD_PIPE_SESSION_CTL 2 +#define PFD_MRT_FILE 3 void rde_sighdlr(int); void rde_dispatch_imsg_session(struct imsgbuf *); @@ -58,6 +59,7 @@ void rde_update_log(const char *, const struct rde_peer *, const struct bgpd_addr *, const struct bgpd_addr *, u_int8_t); int rde_reflector(struct rde_peer *, struct rde_aspath *); + void rde_dump_rib_as(struct prefix *, struct rde_aspath *,pid_t, int); void rde_dump_filter(struct prefix *, @@ -68,6 +70,10 @@ void rde_dump_upcall(struct pt_entry *, void *); void rde_dump_as(struct ctl_show_rib_request *); void rde_dump_prefix_upcall(struct pt_entry *, void *); void rde_dump_prefix(struct ctl_show_rib_request *); +void rde_dump_ctx_new(struct ctl_show_rib_request *, pid_t, + enum imsg_type); +void rde_dump_runner(void); + void rde_up_dump_upcall(struct pt_entry *, void *); void rde_softreconfig_out(struct pt_entry *, void *); void rde_softreconfig_in(struct pt_entry *, void *); @@ -101,10 +107,20 @@ struct rde_peer peerself; struct rde_peer peerdynamic; struct filter_head *rules_l, *newrules; struct imsgbuf *ibuf_se; +struct imsgbuf *ibuf_se_ctl; struct imsgbuf *ibuf_main; struct mrt *mrt; struct rde_memstats rdemem; +struct rde_dump_ctx { + TAILQ_ENTRY(rde_dump_ctx) entry; + struct pt_context ptc; + struct ctl_show_rib_request req; + sa_family_t af; +}; + +TAILQ_HEAD(, rde_dump_ctx) rde_dump_h = TAILQ_HEAD_INITIALIZER(rde_dump_h); + void rde_sighdlr(int sig) { @@ -125,13 +141,13 @@ pid_t rde_main(struct bgpd_config *config, struct peer *peer_l, struct network_head *net_l, struct filter_head *rules, struct mrt_head *mrt_l, int pipe_m2r[2], int pipe_s2r[2], int pipe_m2s[2], - int debug) + int pipe_s2rctl[2], int debug) { pid_t pid; struct passwd *pw; struct peer *p; struct listen_addr *la; - struct pollfd pfd[3]; + struct pollfd pfd[4]; struct filter_rule *f; struct filter_set *set; struct nexthop *nh; @@ -169,15 +185,18 @@ rde_main(struct bgpd_config *config, struct peer *peer_l, signal(SIGPIPE, SIG_IGN); close(pipe_s2r[0]); + close(pipe_s2rctl[0]); close(pipe_m2r[0]); close(pipe_m2s[0]); close(pipe_m2s[1]); /* initialize the RIB structures */ if ((ibuf_se = malloc(sizeof(struct imsgbuf))) == NULL || + (ibuf_se_ctl = malloc(sizeof(struct imsgbuf))) == NULL || (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) fatal(NULL); imsg_init(ibuf_se, pipe_s2r[1]); + imsg_init(ibuf_se_ctl, pipe_s2rctl[1]); imsg_init(ibuf_main, pipe_m2r[1]); /* peer list, mrt list and listener list are not used in the RDE */ @@ -231,7 +250,12 @@ rde_main(struct bgpd_config *config, struct peer *peer_l, if (ibuf_se->w.queued > 0) pfd[PFD_PIPE_SESSION].events |= POLLOUT; - i = 2; + pfd[PFD_PIPE_SESSION_CTL].fd = ibuf_se_ctl->fd; + pfd[PFD_PIPE_SESSION_CTL].events = POLLIN; + if (ibuf_se_ctl->w.queued > 0) + pfd[PFD_PIPE_SESSION_CTL].events |= POLLOUT; + + i = 3; if (mrt && mrt->queued) { pfd[PFD_MRT_FILE].fd = mrt->fd; pfd[PFD_MRT_FILE].events = POLLOUT; @@ -260,6 +284,14 @@ rde_main(struct bgpd_config *config, struct peer *peer_l, if (pfd[PFD_PIPE_SESSION].revents & POLLIN) rde_dispatch_imsg_session(ibuf_se); + if ((pfd[PFD_PIPE_SESSION_CTL].revents & POLLOUT) && + ibuf_se_ctl->w.queued) + if (msgbuf_write(&ibuf_se_ctl->w) < 0) + fatal("pipe write error"); + + if (pfd[PFD_PIPE_SESSION_CTL].revents & POLLIN) + rde_dispatch_imsg_session(ibuf_se_ctl); + if (pfd[PFD_MRT_FILE].revents & POLLOUT) { if (mrt_write(mrt) == -1) { free(mrt); @@ -270,16 +302,18 @@ rde_main(struct bgpd_config *config, struct peer *peer_l, rde_update_queue_runner(); rde_update6_queue_runner(); + if (ibuf_se_ctl->w.queued <= 0) + rde_dump_runner(); } /* do not clean up on shutdown on production, it takes ages. */ if (debug) rde_shutdown(); - msgbuf_write(&ibuf_se->w); msgbuf_clear(&ibuf_se->w); free(ibuf_se); - msgbuf_write(&ibuf_main->w); + msgbuf_clear(&ibuf_se_ctl->w); + free(ibuf_se_ctl); msgbuf_clear(&ibuf_main->w); free(ibuf_main); @@ -302,7 +336,6 @@ rde_dispatch_imsg_session(struct imsgbuf *ibuf) struct ctl_show_rib_request req; struct filter_set *s; struct nexthop *nh; - pid_t pid; int n; sa_family_t af = AF_UNSPEC; @@ -426,11 +459,9 @@ badnet: log_warnx("rde_dispatch: wrong imsg len"); break; } - pid = imsg.hdr.pid; - memcpy(&af, imsg.data, sizeof(af)); - pt_dump(network_dump_upcall, &pid, af); - imsg_compose(ibuf_se, IMSG_CTL_END, 0, pid, -1, - NULL, 0); + bzero(&req, sizeof(req)); + memcpy(&req.af, imsg.data, sizeof(af)); + rde_dump_ctx_new(&req, imsg.hdr.pid, imsg.hdr.type); break; case IMSG_CTL_SHOW_RIB: if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof(req)) { @@ -438,10 +469,7 @@ badnet: break; } memcpy(&req, imsg.data, sizeof(req)); - req.pid = imsg.hdr.pid; - pt_dump(rde_dump_upcall, &req, req.af); - imsg_compose(ibuf_se, IMSG_CTL_END, 0, req.pid, -1, - NULL, 0); + rde_dump_ctx_new(&req, imsg.hdr.pid, imsg.hdr.type); break; case IMSG_CTL_SHOW_RIB_AS: if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof(req)) { @@ -451,7 +479,7 @@ badnet: memcpy(&req, imsg.data, sizeof(req)); req.pid = imsg.hdr.pid; rde_dump_as(&req); - imsg_compose(ibuf_se, IMSG_CTL_END, 0, req.pid, -1, + imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, req.pid, -1, NULL, 0); break; case IMSG_CTL_SHOW_RIB_PREFIX: @@ -462,7 +490,7 @@ badnet: memcpy(&req, imsg.data, sizeof(req)); req.pid = imsg.hdr.pid; rde_dump_prefix(&req); - imsg_compose(ibuf_se, IMSG_CTL_END, 0, req.pid, -1, + imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, req.pid, -1, NULL, 0); break; case IMSG_CTL_SHOW_NEIGHBOR: @@ -475,15 +503,15 @@ badnet: peer = peer_get(p.conf.id); if (peer != NULL) p.stats.prefix_cnt = peer->prefix_cnt; - imsg_compose(ibuf_se, IMSG_CTL_SHOW_NEIGHBOR, 0, + imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0, imsg.hdr.pid, -1, &p, sizeof(struct peer)); break; case IMSG_CTL_END: - imsg_compose(ibuf_se, IMSG_CTL_END, 0, imsg.hdr.pid, + imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid, -1, NULL, 0); break; case IMSG_CTL_SHOW_RIB_MEM: - imsg_compose(ibuf_se, IMSG_CTL_SHOW_RIB_MEM, 0, + imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0, imsg.hdr.pid, -1, &rdemem, sizeof(rdemem)); break; default: @@ -1566,22 +1594,23 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) rib.flags |= F_RIB_ELIGIBLE; rib.aspath_len = aspath_length(asp->aspath); - if ((wbuf = imsg_create(ibuf_se, IMSG_CTL_SHOW_RIB, 0, pid, + if ((wbuf = imsg_create(ibuf_se_ctl, IMSG_CTL_SHOW_RIB, 0, pid, sizeof(rib) + rib.aspath_len)) == NULL) return; if (imsg_add(wbuf, &rib, sizeof(rib)) == -1 || imsg_add(wbuf, aspath_dump(asp->aspath), rib.aspath_len) == -1) return; - if (imsg_close(ibuf_se, wbuf) == -1) + if (imsg_close(ibuf_se_ctl, wbuf) == -1) return; if (flags & F_CTL_DETAIL) for (l = 0; l < asp->others_len; l++) { if ((a = asp->others[l]) == NULL) break; - if ((wbuf = imsg_create(ibuf_se, IMSG_CTL_SHOW_RIB_ATTR, - 0, pid, attr_optlen(a))) == NULL) + if ((wbuf = imsg_create(ibuf_se_ctl, + IMSG_CTL_SHOW_RIB_ATTR, 0, pid, + attr_optlen(a))) == NULL) return; if ((bp = buf_reserve(wbuf, attr_optlen(a))) == NULL) { buf_free(wbuf); @@ -1592,7 +1621,7 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) buf_free(wbuf); return; } - if (imsg_close(ibuf_se, wbuf) == -1) + if (imsg_close(ibuf_se_ctl, wbuf) == -1) return; } } @@ -1712,6 +1741,62 @@ rde_dump_prefix(struct ctl_show_rib_request *req) } } +void +rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, + enum imsg_type type) +{ + struct rde_dump_ctx *ctx; + u_int error; + + if ((ctx = calloc(1, sizeof(*ctx))) == NULL) { + log_warn("rde_dump_ctx_new"); + error = CTL_RES_NOMEM; + imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, + sizeof(error)); + return; + } + memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request)); + ctx->req.pid = pid; + ctx->req.type = type; + ctx->ptc.count = RDE_RUNNER_ROUNDS; + ctx->af = ctx->req.af; + if (ctx->af == AF_UNSPEC) + ctx->af = AF_INET; + + TAILQ_INSERT_TAIL(&rde_dump_h, ctx, entry); +} + +void +rde_dump_runner(void) +{ + struct rde_dump_ctx *ctx, *next; + + for(ctx = TAILQ_FIRST(&rde_dump_h); ctx != NULL; ctx = next) { + next = TAILQ_NEXT(ctx, entry); + if (ctx->ptc.done) { + imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, + -1, NULL, 0); + TAILQ_REMOVE(&rde_dump_h, ctx, entry); + free(ctx); + continue; + } + switch (ctx->req.type) { + case IMSG_CTL_SHOW_NETWORK: + pt_dump_r(network_dump_upcall, &ctx->req.pid, + ctx->af, &ctx->ptc); + break; + case IMSG_CTL_SHOW_RIB: + pt_dump_r(rde_dump_upcall, &ctx->req, ctx->af, + &ctx->ptc); + break; + default: + fatalx("rde_dump_runner: unsupported imsg type"); + } + if (ctx->ptc.done && ctx->req.af == AF_UNSPEC) + ctx->af = AF_INET6; + } +} + /* * kroute specific functions */ @@ -1997,7 +2082,7 @@ void rde_update_queue_runner(void) { struct rde_peer *peer; - int r, sent; + int r, sent, max = RDE_RUNNER_ROUNDS; u_int16_t len, wd_len, wpos; len = sizeof(queue_buf) - MSGSIZE_HEADER; @@ -2034,7 +2119,8 @@ rde_update_queue_runner(void) fatal("imsg_compose error"); sent++; } - } while (sent != 0); + max -= sent; + } while (sent != 0 && max > 0); } void @@ -2042,7 +2128,7 @@ rde_update6_queue_runner(void) { struct rde_peer *peer; u_char *b; - int sent; + int sent, max = RDE_RUNNER_ROUNDS / 2; u_int16_t len; /* first withdraws ... */ @@ -2062,9 +2148,11 @@ rde_update6_queue_runner(void) fatal("imsg_compose error"); sent++; } - } while (sent != 0); + max -= sent; + } while (sent != 0 && max > 0); /* ... then updates */ + max = RDE_RUNNER_ROUNDS / 2; do { sent = 0; LIST_FOREACH(peer, &peerlist, peer_l) { @@ -2081,7 +2169,8 @@ rde_update6_queue_runner(void) fatal("imsg_compose error"); sent++; } - } while (sent != 0); + max -= sent; + } while (sent != 0 && max > 0); } /* @@ -2476,8 +2565,8 @@ network_dump_upcall(struct pt_entry *pt, void *ptr) k.prefixlen = p->prefix->prefixlen; if (p->aspath->peer == &peerself) k.flags = F_KERNEL; - if (imsg_compose(ibuf_se, IMSG_CTL_SHOW_NETWORK, 0, pid, - -1, &k, sizeof(k)) == -1) + if (imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NETWORK, 0, + pid, -1, &k, sizeof(k)) == -1) log_warnx("network_dump_upcall: " "imsg_compose error"); } @@ -2488,7 +2577,7 @@ network_dump_upcall(struct pt_entry *pt, void *ptr) k6.prefixlen = p->prefix->prefixlen; if (p->aspath->peer == &peerself) k6.flags = F_KERNEL; - if (imsg_compose(ibuf_se, IMSG_CTL_SHOW_NETWORK6, 0, + if (imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NETWORK6, 0, pid, -1, &k6, sizeof(k6)) == -1) log_warnx("network_dump_upcall: " "imsg_compose error"); diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h index 702b3d01abe..0c0c624be40 100644 --- a/usr.sbin/bgpd/rde.h +++ b/usr.sbin/bgpd/rde.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.h,v 1.96 2006/11/10 14:47:32 henning Exp $ */ +/* $OpenBSD: rde.h,v 1.97 2007/01/26 17:40:49 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> and @@ -240,6 +240,20 @@ struct pt_entry6 { struct in6_addr prefix6; }; +struct pt_context { + union { + struct pt_entry p; + struct pt_entry4 p4; + struct pt_entry6 p6; + } pu; +#define ctx_p pu.p +#define ctx_p4 pu.p4 +#define ctx_p6 pu.p6 + /* only count and done should be accessed by callers */ + unsigned int count; + int done; +}; + struct prefix { LIST_ENTRY(prefix) prefix_l, path_l; struct rde_aspath *aspath; @@ -370,6 +384,8 @@ void pt_remove(struct pt_entry *); struct pt_entry *pt_lookup(struct bgpd_addr *); void pt_dump(void (*)(struct pt_entry *, void *), void *, sa_family_t); +void pt_dump_r(void (*)(struct pt_entry *, void *), void *, + sa_family_t, struct pt_context *); /* rde_filter.c */ enum filter_actions rde_filter(struct rde_aspath **, struct filter_head *, diff --git a/usr.sbin/bgpd/rde_prefix.c b/usr.sbin/bgpd/rde_prefix.c index 52e61bb031e..335764b5557 100644 --- a/usr.sbin/bgpd/rde_prefix.c +++ b/usr.sbin/bgpd/rde_prefix.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_prefix.c,v 1.22 2006/01/03 22:49:17 claudio Exp $ */ +/* $OpenBSD: rde_prefix.c,v 1.23 2007/01/26 17:40:49 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -40,12 +40,14 @@ * pt_init: initialize prefix table. * pt_alloc?: allocate a AF specific pt_entry. Internal function. * pt_free: free a pt_entry. Internal function. + * pt_restart used to restart a tree walk at the spot it was aborted earlier. */ /* internal prototypes */ static struct pt_entry4 *pt_alloc4(void); static struct pt_entry6 *pt_alloc6(void); static void pt_free(struct pt_entry *); +static struct pt_entry *pt_restart(struct pt_context *); int pt_prefix_cmp(const struct pt_entry *, const struct pt_entry *); @@ -227,14 +229,53 @@ pt_lookup(struct bgpd_addr *prefix) void pt_dump(void (*upcall)(struct pt_entry *, void *), void *arg, sa_family_t af) { - struct pt_entry *p; - if (af == AF_INET || af == AF_UNSPEC) - RB_FOREACH(p, pt_tree, &pttable4) - upcall(p, arg); + pt_dump_r(upcall, arg, AF_INET, NULL); if (af == AF_INET6 || af == AF_UNSPEC) - RB_FOREACH(p, pt_tree, &pttable6) - upcall(p, arg); + pt_dump_r(upcall, arg, AF_INET6, NULL); +} + +void +pt_dump_r(void (*upcall)(struct pt_entry *, void *), void *arg, + sa_family_t af, struct pt_context *ctx) +{ + struct pt_entry *p; + unsigned int i; + + if (ctx == NULL || ctx->ctx_p.af != af) { + switch (af) { + case AF_INET: + p = RB_MIN(pt_tree, &pttable4); + break; + case AF_INET6: + p = RB_MIN(pt_tree, &pttable6); + break; + default: + return; + } + } else + p = pt_restart(ctx); + + for (i = 0; p != NULL; p = RB_NEXT(pt_tree, unused, p)) { + if (ctx && i++ >= ctx->count) { + /* store next start point */ + switch (p->af) { + case AF_INET: + ctx->ctx_p4 = *(struct pt_entry4 *)p; + break; + case AF_INET6: + ctx->ctx_p6 = *(struct pt_entry6 *)p; + break; + default: + fatalx("pt_dump_r: unknown af"); + } + return; + } + upcall(p, arg); + } + + if (ctx) + ctx->done = 1; } int @@ -321,3 +362,55 @@ pt_free(struct pt_entry *pte) free(pte); } +static struct pt_entry * +pt_restart(struct pt_context *ctx) +{ + struct pt_entry *tmp, *prev = NULL; + int comp; + + /* first select correct tree */ + switch (ctx->ctx_p.af) { + case AF_INET: + tmp = RB_ROOT(&pttable4); + break; + case AF_INET6: + tmp = RB_ROOT(&pttable6); + break; + default: + fatalx("pt_restart: unknown af"); + } + + /* then try to find the element */ + while (tmp) { + prev = tmp; + comp = pt_prefix_cmp(&ctx->ctx_p, tmp); + if (comp < 0) + tmp = RB_LEFT(tmp, pt_e); + else if (comp > 0) + tmp = RB_RIGHT(tmp, pt_e); + else + return (tmp); + } + + /* no match, empty tree */ + if (prev == NULL) + return (NULL); + + /* + * no perfect match + * if last element was bigger use that as new start point + */ + if (comp < 0) + return (prev); + + /* backtrack until parent is bigger */ + do { + prev = RB_PARENT(prev, pt_e); + if (prev == NULL) + /* all elements in the tree are smaler */ + return (NULL); + comp = pt_prefix_cmp(&ctx->ctx_p, prev); + } while (comp > 0); + + return (prev); +} diff --git a/usr.sbin/bgpd/session.c b/usr.sbin/bgpd/session.c index 941d54fd128..ce3e9851350 100644 --- a/usr.sbin/bgpd/session.c +++ b/usr.sbin/bgpd/session.c @@ -1,4 +1,4 @@ -/* $OpenBSD: session.c,v 1.267 2007/01/23 17:41:22 claudio Exp $ */ +/* $OpenBSD: session.c,v 1.268 2007/01/26 17:40:49 claudio Exp $ */ /* * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> @@ -46,9 +46,10 @@ #define PFD_PIPE_MAIN 0 #define PFD_PIPE_ROUTE 1 -#define PFD_SOCK_CTL 2 -#define PFD_SOCK_RCTL 3 -#define PFD_LISTENERS_START 4 +#define PFD_PIPE_ROUTE_CTL 2 +#define PFD_SOCK_CTL 3 +#define PFD_SOCK_RCTL 4 +#define PFD_LISTENERS_START 5 void session_sighdlr(int); int setup_listeners(u_int *); @@ -101,6 +102,7 @@ int pending_reconf = 0; int csock = -1, rcsock = -1; u_int peer_cnt; struct imsgbuf *ibuf_rde; +struct imsgbuf *ibuf_rde_ctl; struct imsgbuf *ibuf_main; struct mrt_head mrthead; @@ -175,7 +177,8 @@ setup_listeners(u_int *la_cnt) pid_t session_main(struct bgpd_config *config, struct peer *cpeers, struct network_head *net_l, struct filter_head *rules, - struct mrt_head *m_l, int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2]) + struct mrt_head *m_l, int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2], + int pipe_s2rctl[2]) { int nfds, timeout; unsigned int i, j, idx_peers, idx_listeners, idx_mrts; @@ -184,6 +187,7 @@ session_main(struct bgpd_config *config, struct peer *cpeers, u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; u_int listener_cnt, ctl_cnt, mrt_cnt; u_int new_cnt; + u_int32_t ctl_queued; struct passwd *pw; struct peer *p, **peer_l = NULL, *last, *next; struct network *net; @@ -242,13 +246,16 @@ session_main(struct bgpd_config *config, struct peer *cpeers, log_info("session engine ready"); close(pipe_m2s[0]); close(pipe_s2r[1]); + close(pipe_s2rctl[1]); close(pipe_m2r[0]); close(pipe_m2r[1]); init_conf(conf); if ((ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL || + (ibuf_rde_ctl = malloc(sizeof(struct imsgbuf))) == NULL || (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) fatal(NULL); imsg_init(ibuf_rde, pipe_s2r[0]); + imsg_init(ibuf_rde_ctl, pipe_s2rctl[0]); imsg_init(ibuf_main, pipe_m2s[1]); TAILQ_INIT(&ctl_conns); control_listen(csock); @@ -371,6 +378,18 @@ session_main(struct bgpd_config *config, struct peer *cpeers, pfd[PFD_PIPE_ROUTE].events = POLLIN; if (ibuf_rde->w.queued > 0) pfd[PFD_PIPE_ROUTE].events |= POLLOUT; + + ctl_queued = 0; + TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) + ctl_queued += ctl_conn->ibuf.w.queued; + + pfd[PFD_PIPE_ROUTE_CTL].fd = ibuf_rde_ctl->fd; + if (ctl_queued < SESSION_CTL_QUEUE_MAX) + /* + * Do not act as unlimited buffer. Don't read in more + * messages if the ctl sockets are getting full. + */ + pfd[PFD_PIPE_ROUTE_CTL].events = POLLIN; pfd[PFD_SOCK_CTL].fd = csock; pfd[PFD_SOCK_CTL].events = POLLIN; pfd[PFD_SOCK_RCTL].fd = rcsock; @@ -496,6 +515,12 @@ session_main(struct bgpd_config *config, struct peer *cpeers, &listener_cnt); } + if (nfds > 0 && pfd[PFD_PIPE_ROUTE_CTL].revents & POLLIN) { + nfds--; + session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL, + &listener_cnt); + } + if (nfds > 0 && pfd[PFD_SOCK_CTL].revents & POLLIN) { nfds--; ctl_cnt += control_accept(csock, 0); @@ -2424,7 +2449,7 @@ session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) case IMSG_CTL_SHOW_NETWORK: case IMSG_CTL_SHOW_NETWORK6: case IMSG_CTL_SHOW_NEIGHBOR: - if (idx != PFD_PIPE_ROUTE) + if (idx != PFD_PIPE_ROUTE_CTL) fatalx("ctl rib request not from RDE"); control_imsg_relay(&imsg); break; diff --git a/usr.sbin/bgpd/session.h b/usr.sbin/bgpd/session.h index 8d95671c438..5a0894b3f09 100644 --- a/usr.sbin/bgpd/session.h +++ b/usr.sbin/bgpd/session.h @@ -1,4 +1,4 @@ -/* $OpenBSD: session.h,v 1.89 2007/01/04 12:43:36 claudio Exp $ */ +/* $OpenBSD: session.h,v 1.90 2007/01/26 17:40:49 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -202,7 +202,7 @@ struct peer *peers; void session_socket_blockmode(int, enum blockmodes); pid_t session_main(struct bgpd_config *, struct peer *, struct network_head *, struct filter_head *, - struct mrt_head *, int[2], int[2], int[2]); + struct mrt_head *, int[2], int[2], int[2], int[2]); void bgp_fsm(struct peer *, enum session_events); int session_neighbor_rrefresh(struct peer *p); struct peer *getpeerbyaddr(struct bgpd_addr *); @@ -229,7 +229,7 @@ void prepare_listeners(struct bgpd_config *); /* rde.c */ pid_t rde_main(struct bgpd_config *, struct peer *, struct network_head *, struct filter_head *, struct mrt_head *, int[2], int[2], int[2], - int); + int[2], int); /* control.c */ int control_init(int, char *); |