diff options
-rw-r--r-- | usr.sbin/bgpd/bgpd.h | 28 | ||||
-rw-r--r-- | usr.sbin/bgpd/parse.y | 6 | ||||
-rw-r--r-- | usr.sbin/bgpd/printconf.c | 4 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde.c | 125 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde.h | 6 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde_rib.c | 32 | ||||
-rw-r--r-- | usr.sbin/bgpd/session.c | 306 | ||||
-rw-r--r-- | usr.sbin/bgpd/session.h | 5 |
8 files changed, 477 insertions, 35 deletions
diff --git a/usr.sbin/bgpd/bgpd.h b/usr.sbin/bgpd/bgpd.h index 68b599ade5c..85736efe262 100644 --- a/usr.sbin/bgpd/bgpd.h +++ b/usr.sbin/bgpd/bgpd.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bgpd.h,v 1.270 2012/05/27 18:52:07 claudio Exp $ */ +/* $OpenBSD: bgpd.h,v 1.271 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -132,6 +132,7 @@ extern const struct aid aid_vals[]; #define AID_INET6 2 #define AID_VPN_IPv4 3 #define AID_MAX 4 +#define AID_MIN 1 /* skip AID_UNSPEC since that is a dummy */ #define AID_VALS { \ /* afi, af, safii, name */ \ @@ -253,12 +254,25 @@ struct peer_auth { }; struct capabilities { - int8_t mp[AID_MAX]; /* multiprotocol extensions, RFC 4760 */ - int8_t refresh; /* route refresh, RFC 2918 */ - int8_t restart; /* graceful restart, RFC 4724 */ - int8_t as4byte; /* draft-ietf-idr-as4bytes-13 */ + struct { + int16_t timeout; /* graceful restart timeout */ + int8_t flags[AID_MAX]; /* graceful restart per AID flags */ + int8_t restart; /* graceful restart, RFC 4724 */ + } grestart; + int8_t mp[AID_MAX]; /* multiprotocol extensions, RFC 4760 */ + int8_t refresh; /* route refresh, RFC 2918 */ + int8_t as4byte; /* 4-byte ASnum, RFC 4893 */ }; +#define CAPA_GR_PRESENT 0x01 +#define CAPA_GR_RESTART 0x02 +#define CAPA_GR_FORWARD 0x04 +#define CAPA_GR_RESTARTING 0x08 + +#define CAPA_GR_TIMEMASK 0x0fff +#define CAPA_GR_R_FLAG 0x8000 +#define CAPA_GR_F_FLAG 0x80 + struct peer_config { struct bgpd_addr remote_addr; struct bgpd_addr local_addr; @@ -373,6 +387,9 @@ enum imsg_type { IMSG_SESSION_ADD, IMSG_SESSION_UP, IMSG_SESSION_DOWN, + IMSG_SESSION_STALE, + IMSG_SESSION_FLUSH, + IMSG_SESSION_RESTARTED, IMSG_MRT_OPEN, IMSG_MRT_REOPEN, IMSG_MRT_CLOSE, @@ -550,6 +567,7 @@ struct ctl_neighbor { #define F_PREF_ACTIVE 0x02 #define F_PREF_INTERNAL 0x04 #define F_PREF_ANNOUNCE 0x08 +#define F_PREF_STALE 0x10 struct ctl_show_rib { struct bgpd_addr true_nexthop; diff --git a/usr.sbin/bgpd/parse.y b/usr.sbin/bgpd/parse.y index c712abe001f..108e1ff8c18 100644 --- a/usr.sbin/bgpd/parse.y +++ b/usr.sbin/bgpd/parse.y @@ -1,4 +1,4 @@ -/* $OpenBSD: parse.y,v 1.262 2012/07/13 15:25:37 claudio Exp $ */ +/* $OpenBSD: parse.y,v 1.263 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2002, 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -1038,7 +1038,7 @@ peeropts : REMOTEAS as4number { curpeer->conf.capabilities.refresh = $3; } | ANNOUNCE RESTART yesno { - curpeer->conf.capabilities.restart = $3; + curpeer->conf.capabilities.grestart.restart = $3; } | ANNOUNCE AS4BYTE yesno { curpeer->conf.capabilities.as4byte = $3; @@ -3019,7 +3019,7 @@ alloc_peer(void) for (i = 0; i < AID_MAX; i++) p->conf.capabilities.mp[i] = -1; p->conf.capabilities.refresh = 1; - p->conf.capabilities.restart = 0; + p->conf.capabilities.grestart.restart = 0; p->conf.capabilities.as4byte = 1; p->conf.local_as = conf->as; p->conf.local_short_as = conf->short_as; diff --git a/usr.sbin/bgpd/printconf.c b/usr.sbin/bgpd/printconf.c index 6599761c638..88ffb052197 100644 --- a/usr.sbin/bgpd/printconf.c +++ b/usr.sbin/bgpd/printconf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: printconf.c,v 1.86 2011/09/17 16:29:44 claudio Exp $ */ +/* $OpenBSD: printconf.c,v 1.87 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -377,7 +377,7 @@ print_peer(struct peer_config *p, struct bgpd_config *conf, const char *c) printf("%s\tannounce capabilities no\n", c); if (p->capabilities.refresh == 0) printf("%s\tannounce refresh no\n", c); - if (p->capabilities.restart == 1) + if (p->capabilities.grestart.restart == 1) printf("%s\tannounce restart yes\n", c); if (p->capabilities.as4byte == 0) printf("%s\tannounce as4byte no\n", c); diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index 8459ac67739..cd4456c4a8a 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.317 2012/08/12 14:24:56 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.318 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -101,6 +101,9 @@ struct rde_peer *peer_add(u_int32_t, struct peer_config *); struct rde_peer *peer_get(u_int32_t); void peer_up(u_int32_t, struct session_up *); void peer_down(u_int32_t); +void peer_flush(struct rde_peer *, u_int8_t); +void peer_stale(u_int32_t, u_int8_t); +void peer_recv_eor(struct rde_peer *, u_int8_t); void peer_dump(u_int32_t, u_int8_t); void peer_send_eor(struct rde_peer *, u_int8_t); @@ -407,6 +410,47 @@ rde_dispatch_imsg_session(struct imsgbuf *ibuf) case IMSG_SESSION_DOWN: peer_down(imsg.hdr.peerid); break; + case IMSG_SESSION_STALE: + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_STALE: bad AID"); + peer_stale(imsg.hdr.peerid, aid); + break; + case IMSG_SESSION_FLUSH: + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_FLUSH: bad AID"); + if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { + log_warnx("rde_dispatch: unknown peer id %d", + imsg.hdr.peerid); + break; + } + peer_flush(peer, aid); + break; + case IMSG_SESSION_RESTARTED: + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_RESTARTED: bad AID"); + if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { + log_warnx("rde_dispatch: unknown peer id %d", + imsg.hdr.peerid); + break; + } + if (peer->staletime[aid]) + peer_flush(peer, aid); + break; case IMSG_REFRESH: if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { log_warnx("rde_dispatch: wrong imsg len"); @@ -559,10 +603,14 @@ badnet: peer->prefix_rcvd_update; p.stats.prefix_rcvd_withdraw = peer->prefix_rcvd_withdraw; + p.stats.prefix_rcvd_eor = + peer->prefix_rcvd_eor; p.stats.prefix_sent_update = peer->prefix_sent_update; p.stats.prefix_sent_withdraw = peer->prefix_sent_withdraw; + p.stats.prefix_sent_eor = + peer->prefix_sent_eor; } imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0, imsg.hdr.pid, -1, &p, sizeof(struct peer)); @@ -1020,6 +1068,10 @@ rde_update_dispatch(struct imsg *imsg) ERR_UPD_ATTRLIST, NULL, 0); return (-1); } + if (withdrawn_len == 0) { + /* EoR marker */ + peer_recv_eor(peer, AID_INET); + } return (0); } @@ -1050,6 +1102,11 @@ rde_update_dispatch(struct imsg *imsg) goto done; } + if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0 && mplen == 0) { + /* EoR marker */ + peer_recv_eor(peer, aid); + } + switch (aid) { case AID_INET6: while (mplen > 0) { @@ -2165,6 +2222,7 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) struct ibuf *wbuf; struct attr *a; void *bp; + time_t staletime;; u_int8_t l; bzero(&rib, sizeof(rib)); @@ -2201,6 +2259,9 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) rib.flags |= F_PREF_ELIGIBLE; if (asp->flags & F_ATTR_LOOP) rib.flags &= ~F_PREF_ELIGIBLE; + staletime = asp->peer->staletime[p->prefix->aid]; + if (staletime && p->lastchange <= staletime) + rib.flags |= F_PREF_STALE; rib.aspath_len = aspath_length(asp->aspath); if ((wbuf = imsg_create(ibuf_se_ctl, IMSG_CTL_SHOW_RIB, 0, pid, @@ -3163,7 +3224,8 @@ peer_up(u_int32_t id, struct session_up *sup) return; } - if (peer->state != PEER_DOWN && peer->state != PEER_NONE) + if (peer->state != PEER_DOWN && peer->state != PEER_NONE && + peer->state != PEER_UP) fatalx("peer_up: bad state"); peer->remote_bgpid = ntohl(sup->remote_bgpid); peer->short_as = sup->short_as; @@ -3220,6 +3282,50 @@ peer_down(u_int32_t id) free(peer); } +/* + * Flush all routes older then staletime. If staletime is 0 all routes will + * be flushed. + */ +void +peer_flush(struct rde_peer *peer, u_int8_t aid) +{ + struct rde_aspath *asp, *nasp; + + /* walk through per peer RIB list and remove all stale prefixes. */ + for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) { + nasp = LIST_NEXT(asp, peer_l); + path_remove_stale(asp, aid); + } + + /* Deletions are performed in path_remove() */ + rde_send_pftable_commit(); + + /* flushed no need to keep staletime */ + peer->staletime[aid] = 0; +} + +void +peer_stale(u_int32_t id, u_int8_t aid) +{ + struct rde_peer *peer; + time_t now; + + peer = peer_get(id); + if (peer == NULL) { + log_warnx("peer_stale: unknown peer id %d", id); + return; + } + + if (peer->staletime[aid]) + peer_flush(peer, aid); + peer->staletime[aid] = now = time(NULL); + + /* make sure new prefixes start on a higher timestamp */ + do { + sleep(1); + } while (now >= time(NULL)); +} + void peer_dump(u_int32_t id, u_int8_t aid) { @@ -3235,17 +3341,30 @@ peer_dump(u_int32_t id, u_int8_t aid) up_generate_default(rules_l, peer, aid); else rib_dump(&ribs[peer->ribid], rde_up_dump_upcall, peer, aid); - if (peer->capa.restart) + if (peer->capa.grestart.restart) up_generate_marker(peer, aid); } /* End-of-RIB marker, RFC 4724 */ void +peer_recv_eor(struct rde_peer *peer, u_int8_t aid) +{ + peer->prefix_rcvd_eor++; + + /* First notify SE to remove possible race with the timeout. */ + if (imsg_compose(ibuf_se, IMSG_SESSION_RESTARTED, peer->conf.id, + 0, -1, &aid, sizeof(aid)) == -1) + fatal("imsg_compose error"); +} + +void peer_send_eor(struct rde_peer *peer, u_int8_t aid) { u_int16_t afi; u_int8_t safi; + peer->prefix_sent_eor++; + if (aid == AID_INET) { u_char null[4]; diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h index 1d489c055b4..cfbceac38f7 100644 --- a/usr.sbin/bgpd/rde.h +++ b/usr.sbin/bgpd/rde.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.h,v 1.143 2012/08/12 14:24:56 claudio Exp $ */ +/* $OpenBSD: rde.h,v 1.144 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> and @@ -59,10 +59,13 @@ struct rde_peer { struct uplist_attr updates[AID_MAX]; struct uplist_prefix withdraws[AID_MAX]; struct capabilities capa; + time_t staletime[AID_MAX]; u_int64_t prefix_rcvd_update; u_int64_t prefix_rcvd_withdraw; + u_int64_t prefix_rcvd_eor; u_int64_t prefix_sent_update; u_int64_t prefix_sent_withdraw; + u_int64_t prefix_sent_eor; u_int32_t prefix_cnt; /* # of prefixes */ u_int32_t remote_bgpid; /* host byte order! */ u_int32_t up_pcnt; @@ -430,6 +433,7 @@ int path_update(struct rib *, struct rde_peer *, int path_compare(struct rde_aspath *, struct rde_aspath *); struct rde_aspath *path_lookup(struct rde_aspath *, struct rde_peer *); void path_remove(struct rde_aspath *); +void path_remove_stale(struct rde_aspath *, u_int8_t); void path_destroy(struct rde_aspath *); int path_empty(struct rde_aspath *); struct rde_aspath *path_copy(struct rde_aspath *); diff --git a/usr.sbin/bgpd/rde_rib.c b/usr.sbin/bgpd/rde_rib.c index b26ace1ab72..49fd20ccffa 100644 --- a/usr.sbin/bgpd/rde_rib.c +++ b/usr.sbin/bgpd/rde_rib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_rib.c,v 1.133 2012/07/01 11:55:13 sthen Exp $ */ +/* $OpenBSD: rde_rib.c,v 1.134 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -505,6 +505,36 @@ path_remove(struct rde_aspath *asp) } } +/* remove all stale routes or if staletime is 0 remove all routes for + a specified AID. */ +void +path_remove_stale(struct rde_aspath *asp, u_int8_t aid) +{ + struct prefix *p, *np; + time_t staletime; + + staletime = asp->peer->staletime[aid]; + for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) { + np = LIST_NEXT(p, path_l); + if (p->prefix->aid != aid) + continue; + + if (staletime && p->lastchange > staletime) + continue; + + if (asp->pftableid) { + struct bgpd_addr addr; + + pt_getaddr(p->prefix, &addr); + /* Commit is done in peer_flush() */ + rde_send_pftable(p->aspath->pftableid, &addr, + p->prefix->prefixlen, 1); + } + prefix_destroy(p); + } +} + + /* this function is only called by prefix_remove and path_remove */ void path_destroy(struct rde_aspath *asp) diff --git a/usr.sbin/bgpd/session.c b/usr.sbin/bgpd/session.c index 22d863be1ec..878f3eb1611 100644 --- a/usr.sbin/bgpd/session.c +++ b/usr.sbin/bgpd/session.c @@ -1,4 +1,4 @@ -/* $OpenBSD: session.c,v 1.323 2012/07/11 09:43:10 sthen Exp $ */ +/* $OpenBSD: session.c,v 1.324 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> @@ -69,6 +69,7 @@ void session_tcp_established(struct peer *); void session_capa_ann_none(struct peer *); int session_capa_add(struct ibuf *, u_int8_t, u_int8_t); int session_capa_add_mp(struct ibuf *, u_int8_t); +int session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t); struct bgp_msg *session_newmsg(enum msg_type, u_int16_t); int session_sendmsg(struct bgp_msg *, struct peer *); void session_open(struct peer *); @@ -77,6 +78,9 @@ void session_update(u_int32_t, void *, size_t); void session_notification(struct peer *, u_int8_t, u_int8_t, void *, ssize_t); void session_rrefresh(struct peer *, u_int8_t); +int session_graceful_restart(struct peer *); +int session_graceful_is_restarting(struct peer *); +int session_graceful_stop(struct peer *); int session_dispatch_msg(struct pollfd *, struct peer *); int session_process_msg(struct peer *); int parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *); @@ -437,6 +441,10 @@ session_main(int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2], p->state == STATE_ESTABLISHED) session_demote(p, -1); break; + case Timer_RestartTimeout: + timer_stop(p, Timer_RestartTimeout); + session_graceful_stop(p); + break; default: fatalx("King Bula lost in time"); } @@ -941,14 +949,24 @@ change_state(struct peer *peer, enum session_state state, free(peer->rbuf); peer->rbuf = NULL; bzero(&peer->capa.peer, sizeof(peer->capa.peer)); - if (peer->state == STATE_ESTABLISHED) - session_down(peer); + if (event != EVNT_STOP) { timer_set(peer, Timer_IdleHold, peer->IdleHoldTime); if (event != EVNT_NONE && peer->IdleHoldTime < MAX_IDLE_HOLD/2) peer->IdleHoldTime *= 2; } + if (peer->state == STATE_ESTABLISHED) { + if (peer->capa.neg.grestart.restart == 2 && + (event == EVNT_CON_CLOSED || + event == EVNT_CON_FATAL)) { + /* don't punish graceful restart */ + timer_set(peer, Timer_IdleHold, 0); + peer->IdleHoldTime /= 2; + session_graceful_restart(peer); + } else + session_down(peer); + } if (peer->state == STATE_NONE || peer->state == STATE_ESTABLISHED) { /* initialize capability negotiation structures */ @@ -959,6 +977,20 @@ change_state(struct peer *peer, enum session_state state, } break; case STATE_CONNECT: + if (peer->state == STATE_ESTABLISHED && + peer->capa.neg.grestart.restart == 2) { + /* do the graceful restart dance */ + session_graceful_restart(peer); + peer->holdtime = INTERVAL_HOLD_INITIAL; + timer_stop(peer, Timer_ConnectRetry); + timer_stop(peer, Timer_Keepalive); + timer_stop(peer, Timer_Hold); + timer_stop(peer, Timer_IdleHold); + timer_stop(peer, Timer_IdleHoldReset); + session_close_connection(peer); + msgbuf_clear(&peer->wbuf); + bzero(&peer->capa.peer, sizeof(peer->capa.peer)); + } break; case STATE_ACTIVE: break; @@ -1032,6 +1064,7 @@ session_accept(int listenfd) } } +open: if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { log_peer_warnx(&p->conf, "ipsec or md5sig configured but not available"); @@ -1064,6 +1097,13 @@ session_accept(int listenfd) } session_socket_blockmode(connfd, BM_NONBLOCK); bgp_fsm(p, EVNT_CON_OPEN); + return; + } else if (p != NULL && p->state == STATE_ESTABLISHED && + p->capa.neg.grestart.restart == 2) { + /* first do the graceful restart dance */ + change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); + /* then do part of the open dance */ + goto open; } else { log_conn_attempt(p, (struct sockaddr *)&cliaddr); close(connfd); @@ -1290,6 +1330,30 @@ session_capa_add_mp(struct ibuf *buf, u_int8_t aid) return (errs); } +int +session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid) +{ + u_int errs = 0; + u_int16_t afi; + u_int8_t flags, safi; + + if (aid2afi(aid, &afi, &safi)) { + log_warn("session_capa_add_gr: bad AID"); + return (1); + } + if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING) + flags = CAPA_GR_F_FLAG; + else + flags = 0; + + afi = htons(afi); + errs += ibuf_add(b, &afi, sizeof(afi)); + errs += ibuf_add(b, &safi, sizeof(safi)); + errs += ibuf_add(b, &flags, sizeof(flags)); + + return (errs); +} + struct bgp_msg * session_newmsg(enum msg_type msgtype, u_int16_t len) { @@ -1350,6 +1414,7 @@ session_open(struct peer *p) u_int16_t len; u_int8_t i, op_type, optparamlen = 0; int errs = 0; + int mpcapa = 0; if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) - @@ -1363,20 +1428,51 @@ session_open(struct peer *p) if (p->capa.ann.mp[i]) { /* 4 bytes data */ errs += session_capa_add(opb, CAPA_MP, 4); errs += session_capa_add_mp(opb, i); + mpcapa++; } /* route refresh, RFC 2918 */ if (p->capa.ann.refresh) /* no data */ errs += session_capa_add(opb, CAPA_REFRESH, 0); - /* End-of-RIB marker, RFC 4724 */ - if (p->capa.ann.restart) { /* 2 bytes data */ - u_char c[2]; + /* graceful restart and End-of-RIB marker, RFC 4724 */ + if (p->capa.ann.grestart.restart) { + int rst = 0; + u_int16_t hdr; + u_int8_t grlen; + + if (mpcapa) { + grlen = 2 + 4 * mpcapa; + for (i = 0; i < AID_MAX; i++) { + if (p->capa.neg.grestart.flags[i] & + CAPA_GR_RESTARTING) + rst++; + } + } else { /* AID_INET */ + grlen = 2 + 4; + if (p->capa.neg.grestart.flags[AID_INET] & + CAPA_GR_RESTARTING) + rst++; + } + + hdr = conf->holdtime; /* default timeout */ + /* if client does graceful restart don't set R flag */ + if (!rst) + hdr |= CAPA_GR_R_FLAG; + hdr = htons(hdr); + + errs += session_capa_add(opb, CAPA_RESTART, grlen); + errs += ibuf_add(opb, &hdr, sizeof(hdr)); - c[0] = 0x80; /* we're always restarting */ - c[1] = 0; - errs += session_capa_add(opb, CAPA_RESTART, 2); - errs += ibuf_add(opb, &c, 2); + if (mpcapa) { + for (i = 0; i < AID_MAX; i++) { + if (p->capa.ann.mp[i]) { + errs += session_capa_add_gr(p, opb, i); + } + } + } else { /* AID_INET */ + errs += session_capa_add_gr(p, opb, AID_INET); + } } /* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */ @@ -1583,6 +1679,69 @@ session_rrefresh(struct peer *p, u_int8_t aid) } int +session_graceful_restart(struct peer *p) +{ + u_int8_t i; + + timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout); + + for (i = 0; i < AID_MAX; i++) { + if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { + if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + log_peer_warnx(&p->conf, + "graceful restart of %s, keeping routes", + aid2str(i)); + p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; + } else if (p->capa.neg.mp[i]) { + if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + log_peer_warnx(&p->conf, + "graceful restart of %s, flushing routes", + aid2str(i)); + } + } + return (0); +} + +int +session_graceful_is_restarting(struct peer *p) +{ + u_int8_t i; + + for (i = 0; i < AID_MAX; i++) + if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) + return (1); + return (0); +} + +int +session_graceful_stop(struct peer *p) +{ + u_int8_t i; + + for (i = 0; i < AID_MAX; i++) { + /* + * Only flush if the peer is restarting and the peer indicated + * it hold the forwarding state. In all other cases the + * session was already flushed when the session came up. + */ + if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING && + p->capa.neg.grestart.flags[i] & CAPA_GR_FORWARD) { + log_peer_warnx(&p->conf, "graceful restart of %s, " + "time-out, flushing", aid2str(i)); + if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + } + p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; + } + return (0); +} + +int session_dispatch_msg(struct pollfd *pfd, struct peer *p) { ssize_t n; @@ -2156,7 +2315,7 @@ parse_notification(struct peer *peer) "disabling route refresh capability"); break; case CAPA_RESTART: - peer->capa.ann.restart = 0; + peer->capa.ann.grestart.restart = 0; log_peer_warnx(&peer->conf, "disabling restart capability"); break; @@ -2194,10 +2353,13 @@ parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as) u_int32_t remote_as; u_int16_t len; u_int16_t afi; + u_int16_t gr_header; u_int8_t safi; u_int8_t aid; + u_int8_t gr_flags; u_int8_t capa_code; u_int8_t capa_len; + u_int8_t i; len = dlen; while (len > 0) { @@ -2249,8 +2411,50 @@ parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as) peer->capa.peer.refresh = 1; break; case CAPA_RESTART: - peer->capa.peer.restart = 1; - /* we don't care about the further restart capas yet */ + if (capa_len == 2) { + /* peer only supports EoR marker */ + peer->capa.peer.grestart.restart = 1; + peer->capa.peer.grestart.timeout = 0; + break; + } else if (capa_len % 4 != 2) { + log_peer_warnx(&peer->conf, + "parse_capabilities: " + "expect len 2 + x*4, len is %u", capa_len); + return (-1); + } + + memcpy(&gr_header, capa_val, sizeof(gr_header)); + gr_header = ntohs(gr_header); + peer->capa.peer.grestart.timeout = + gr_header & CAPA_GR_TIMEMASK; + if (peer->capa.peer.grestart.timeout == 0) { + log_peer_warnx(&peer->conf, + "graceful restart timeout is zero"); + return (-1); + } + + for (i = 2; i <= capa_len - 4; i += 4) { + memcpy(&afi, capa_val + i, sizeof(afi)); + afi = ntohs(afi); + memcpy(&safi, capa_val + i + 2, sizeof(safi)); + if (afi2aid(afi, safi, &aid) == -1) { + log_peer_warnx(&peer->conf, + "parse_capabilities: restart: AFI " + "%u, safi %u unknown", afi, safi); + return (-1); + } + memcpy(&gr_flags, capa_val + i + 3, + sizeof(gr_flags)); + peer->capa.peer.grestart.flags[aid] |= + CAPA_GR_PRESENT; + if (gr_flags & CAPA_GR_F_FLAG) + peer->capa.peer.grestart.flags[aid] |= + CAPA_GR_FORWARD; + if (gr_header & CAPA_GR_R_FLAG) + peer->capa.peer.grestart.flags[aid] |= + CAPA_GR_RESTART; + peer->capa.peer.grestart.restart = 2; + } break; case CAPA_AS4BYTE: if (capa_len != 4) { @@ -2293,11 +2497,40 @@ capa_neg_calc(struct peer *p) } else p->capa.neg.mp[i] = 0; } - /* if no MP capability present for default IPv4 unicast mode */ + /* if no MP capability present default to IPv4 unicast mode */ if (!hasmp) p->capa.neg.mp[AID_INET] = 1; - p->capa.neg.restart = p->capa.peer.restart; + /* + * graceful restart: only the peer capabilities are of interest here. + * It is necessary to compare the new values with the previous ones + * and act acordingly. AFI/SAFI that are not part in the MP capability + * are treated as not being present. + */ + + for (i = 0; i < AID_MAX; i++) { + /* disable GR if the AFI/SAFI is not present */ + if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && + p->capa.neg.mp[i] == 0) + p->capa.peer.grestart.flags[i] = 0; /* disable */ + /* look at current GR state and decide what to do */ + if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { + if (!(p->capa.peer.grestart.flags[i] & + CAPA_GR_FORWARD)) { + if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + log_peer_warnx(&p->conf, "graceful restart of " + "%s, not restarted, flushing", aid2str(i)); + } + p->capa.neg.grestart.flags[i] = + p->capa.peer.grestart.flags[i] | CAPA_GR_RESTARTING; + } else + p->capa.neg.grestart.flags[i] = + p->capa.peer.grestart.flags[i]; + } + p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; + p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; return (0); } @@ -2315,7 +2548,7 @@ session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) u_char *data; enum reconf_action reconf; int n, depend_ok, restricted; - u_int8_t errcode, subcode; + u_int8_t aid, errcode, subcode; if ((n = imsg_read(ibuf)) == -1) fatal("session_dispatch_imsg: imsg_read error"); @@ -2626,6 +2859,40 @@ session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) break; } break; + case IMSG_SESSION_RESTARTED: + if (idx != PFD_PIPE_ROUTE) + fatalx("update request not from RDE"); + if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { + log_warnx("RDE sent invalid restart msg"); + break; + } + if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) { + log_warnx("no such peer: id=%u", + imsg.hdr.peerid); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_RESTARTED: bad AID"); + if (p->capa.neg.grestart.flags[aid] & + CAPA_GR_RESTARTING && + p->capa.neg.grestart.flags[aid] & + CAPA_GR_FORWARD) { + log_peer_warnx(&p->conf, + "graceful restart of %s finished", + aid2str(aid)); + p->capa.neg.grestart.flags[aid] &= + ~CAPA_GR_RESTARTING; + timer_stop(p, Timer_RestartTimeout); + + /* signal back to RDE to cleanup stale routes */ + if (imsg_compose(ibuf_rde, + IMSG_SESSION_RESTARTED, imsg.hdr.peerid, 0, + -1, &aid, sizeof(aid)) == -1) + fatal("imsg_compose: " + "IMSG_SESSION_RESTARTED"); + } + break; default: break; } @@ -2816,9 +3083,10 @@ session_up(struct peer *p) { struct session_up sup; - if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1, - &p->conf, sizeof(p->conf)) == -1) - fatalx("imsg_compose error"); + if (!session_graceful_is_restarting(p)) + if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1, + &p->conf, sizeof(p->conf)) == -1) + fatalx("imsg_compose error"); sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr); sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr); diff --git a/usr.sbin/bgpd/session.h b/usr.sbin/bgpd/session.h index 5b300aa6148..fcd63884458 100644 --- a/usr.sbin/bgpd/session.h +++ b/usr.sbin/bgpd/session.h @@ -1,4 +1,4 @@ -/* $OpenBSD: session.h,v 1.113 2012/04/12 17:26:09 claudio Exp $ */ +/* $OpenBSD: session.h,v 1.114 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -162,8 +162,10 @@ struct peer_stats { u_int64_t msg_sent_rrefresh; u_int64_t prefix_rcvd_update; u_int64_t prefix_rcvd_withdraw; + u_int64_t prefix_rcvd_eor; u_int64_t prefix_sent_update; u_int64_t prefix_sent_withdraw; + u_int64_t prefix_sent_eor; time_t last_updown; time_t last_read; u_int32_t prefix_cnt; @@ -179,6 +181,7 @@ enum Timer { Timer_IdleHold, Timer_IdleHoldReset, Timer_CarpUndemote, + Timer_RestartTimeout, Timer_Max }; |