diff options
author | Claudio Jeker <claudio@cvs.openbsd.org> | 2012-09-12 05:56:23 +0000 |
---|---|---|
committer | Claudio Jeker <claudio@cvs.openbsd.org> | 2012-09-12 05:56:23 +0000 |
commit | a6a48492a863645e13be3a2dbad123121edb0d34 (patch) | |
tree | 45d2ad0dce6a0473018ab537a16cf6f5fbeb9ad1 /usr.sbin/bgpd | |
parent | e4676b6e97d8af9e8386a416ef2bd813e15fcb1f (diff) |
Better graceful restart support (implementing more then just the EoR record).
This implements only the "Restarting Client" bits of the RFC -- in other
words bgpd will keep the FIB when the client restarts but it will not do GR
when restarting itself. The capability is still off by default (you need
"announce restart yes" to enable it).
Tested by Anders Berggren. OK sthen@
Diffstat (limited to 'usr.sbin/bgpd')
-rw-r--r-- | usr.sbin/bgpd/bgpd.h | 28 | ||||
-rw-r--r-- | usr.sbin/bgpd/parse.y | 6 | ||||
-rw-r--r-- | usr.sbin/bgpd/printconf.c | 4 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde.c | 125 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde.h | 6 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde_rib.c | 32 | ||||
-rw-r--r-- | usr.sbin/bgpd/session.c | 306 | ||||
-rw-r--r-- | usr.sbin/bgpd/session.h | 5 |
8 files changed, 477 insertions, 35 deletions
diff --git a/usr.sbin/bgpd/bgpd.h b/usr.sbin/bgpd/bgpd.h index 68b599ade5c..85736efe262 100644 --- a/usr.sbin/bgpd/bgpd.h +++ b/usr.sbin/bgpd/bgpd.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bgpd.h,v 1.270 2012/05/27 18:52:07 claudio Exp $ */ +/* $OpenBSD: bgpd.h,v 1.271 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -132,6 +132,7 @@ extern const struct aid aid_vals[]; #define AID_INET6 2 #define AID_VPN_IPv4 3 #define AID_MAX 4 +#define AID_MIN 1 /* skip AID_UNSPEC since that is a dummy */ #define AID_VALS { \ /* afi, af, safii, name */ \ @@ -253,12 +254,25 @@ struct peer_auth { }; struct capabilities { - int8_t mp[AID_MAX]; /* multiprotocol extensions, RFC 4760 */ - int8_t refresh; /* route refresh, RFC 2918 */ - int8_t restart; /* graceful restart, RFC 4724 */ - int8_t as4byte; /* draft-ietf-idr-as4bytes-13 */ + struct { + int16_t timeout; /* graceful restart timeout */ + int8_t flags[AID_MAX]; /* graceful restart per AID flags */ + int8_t restart; /* graceful restart, RFC 4724 */ + } grestart; + int8_t mp[AID_MAX]; /* multiprotocol extensions, RFC 4760 */ + int8_t refresh; /* route refresh, RFC 2918 */ + int8_t as4byte; /* 4-byte ASnum, RFC 4893 */ }; +#define CAPA_GR_PRESENT 0x01 +#define CAPA_GR_RESTART 0x02 +#define CAPA_GR_FORWARD 0x04 +#define CAPA_GR_RESTARTING 0x08 + +#define CAPA_GR_TIMEMASK 0x0fff +#define CAPA_GR_R_FLAG 0x8000 +#define CAPA_GR_F_FLAG 0x80 + struct peer_config { struct bgpd_addr remote_addr; struct bgpd_addr local_addr; @@ -373,6 +387,9 @@ enum imsg_type { IMSG_SESSION_ADD, IMSG_SESSION_UP, IMSG_SESSION_DOWN, + IMSG_SESSION_STALE, + IMSG_SESSION_FLUSH, + IMSG_SESSION_RESTARTED, IMSG_MRT_OPEN, IMSG_MRT_REOPEN, IMSG_MRT_CLOSE, @@ -550,6 +567,7 @@ struct ctl_neighbor { #define F_PREF_ACTIVE 0x02 #define F_PREF_INTERNAL 0x04 #define F_PREF_ANNOUNCE 0x08 +#define F_PREF_STALE 0x10 struct ctl_show_rib { struct bgpd_addr true_nexthop; diff --git a/usr.sbin/bgpd/parse.y b/usr.sbin/bgpd/parse.y index c712abe001f..108e1ff8c18 100644 --- a/usr.sbin/bgpd/parse.y +++ b/usr.sbin/bgpd/parse.y @@ -1,4 +1,4 @@ -/* $OpenBSD: parse.y,v 1.262 2012/07/13 15:25:37 claudio Exp $ */ +/* $OpenBSD: parse.y,v 1.263 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2002, 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -1038,7 +1038,7 @@ peeropts : REMOTEAS as4number { curpeer->conf.capabilities.refresh = $3; } | ANNOUNCE RESTART yesno { - curpeer->conf.capabilities.restart = $3; + curpeer->conf.capabilities.grestart.restart = $3; } | ANNOUNCE AS4BYTE yesno { curpeer->conf.capabilities.as4byte = $3; @@ -3019,7 +3019,7 @@ alloc_peer(void) for (i = 0; i < AID_MAX; i++) p->conf.capabilities.mp[i] = -1; p->conf.capabilities.refresh = 1; - p->conf.capabilities.restart = 0; + p->conf.capabilities.grestart.restart = 0; p->conf.capabilities.as4byte = 1; p->conf.local_as = conf->as; p->conf.local_short_as = conf->short_as; diff --git a/usr.sbin/bgpd/printconf.c b/usr.sbin/bgpd/printconf.c index 6599761c638..88ffb052197 100644 --- a/usr.sbin/bgpd/printconf.c +++ b/usr.sbin/bgpd/printconf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: printconf.c,v 1.86 2011/09/17 16:29:44 claudio Exp $ */ +/* $OpenBSD: printconf.c,v 1.87 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -377,7 +377,7 @@ print_peer(struct peer_config *p, struct bgpd_config *conf, const char *c) printf("%s\tannounce capabilities no\n", c); if (p->capabilities.refresh == 0) printf("%s\tannounce refresh no\n", c); - if (p->capabilities.restart == 1) + if (p->capabilities.grestart.restart == 1) printf("%s\tannounce restart yes\n", c); if (p->capabilities.as4byte == 0) printf("%s\tannounce as4byte no\n", c); diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index 8459ac67739..cd4456c4a8a 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.317 2012/08/12 14:24:56 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.318 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -101,6 +101,9 @@ struct rde_peer *peer_add(u_int32_t, struct peer_config *); struct rde_peer *peer_get(u_int32_t); void peer_up(u_int32_t, struct session_up *); void peer_down(u_int32_t); +void peer_flush(struct rde_peer *, u_int8_t); +void peer_stale(u_int32_t, u_int8_t); +void peer_recv_eor(struct rde_peer *, u_int8_t); void peer_dump(u_int32_t, u_int8_t); void peer_send_eor(struct rde_peer *, u_int8_t); @@ -407,6 +410,47 @@ rde_dispatch_imsg_session(struct imsgbuf *ibuf) case IMSG_SESSION_DOWN: peer_down(imsg.hdr.peerid); break; + case IMSG_SESSION_STALE: + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_STALE: bad AID"); + peer_stale(imsg.hdr.peerid, aid); + break; + case IMSG_SESSION_FLUSH: + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_FLUSH: bad AID"); + if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { + log_warnx("rde_dispatch: unknown peer id %d", + imsg.hdr.peerid); + break; + } + peer_flush(peer, aid); + break; + case IMSG_SESSION_RESTARTED: + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_RESTARTED: bad AID"); + if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { + log_warnx("rde_dispatch: unknown peer id %d", + imsg.hdr.peerid); + break; + } + if (peer->staletime[aid]) + peer_flush(peer, aid); + break; case IMSG_REFRESH: if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { log_warnx("rde_dispatch: wrong imsg len"); @@ -559,10 +603,14 @@ badnet: peer->prefix_rcvd_update; p.stats.prefix_rcvd_withdraw = peer->prefix_rcvd_withdraw; + p.stats.prefix_rcvd_eor = + peer->prefix_rcvd_eor; p.stats.prefix_sent_update = peer->prefix_sent_update; p.stats.prefix_sent_withdraw = peer->prefix_sent_withdraw; + p.stats.prefix_sent_eor = + peer->prefix_sent_eor; } imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0, imsg.hdr.pid, -1, &p, sizeof(struct peer)); @@ -1020,6 +1068,10 @@ rde_update_dispatch(struct imsg *imsg) ERR_UPD_ATTRLIST, NULL, 0); return (-1); } + if (withdrawn_len == 0) { + /* EoR marker */ + peer_recv_eor(peer, AID_INET); + } return (0); } @@ -1050,6 +1102,11 @@ rde_update_dispatch(struct imsg *imsg) goto done; } + if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0 && mplen == 0) { + /* EoR marker */ + peer_recv_eor(peer, aid); + } + switch (aid) { case AID_INET6: while (mplen > 0) { @@ -2165,6 +2222,7 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) struct ibuf *wbuf; struct attr *a; void *bp; + time_t staletime;; u_int8_t l; bzero(&rib, sizeof(rib)); @@ -2201,6 +2259,9 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) rib.flags |= F_PREF_ELIGIBLE; if (asp->flags & F_ATTR_LOOP) rib.flags &= ~F_PREF_ELIGIBLE; + staletime = asp->peer->staletime[p->prefix->aid]; + if (staletime && p->lastchange <= staletime) + rib.flags |= F_PREF_STALE; rib.aspath_len = aspath_length(asp->aspath); if ((wbuf = imsg_create(ibuf_se_ctl, IMSG_CTL_SHOW_RIB, 0, pid, @@ -3163,7 +3224,8 @@ peer_up(u_int32_t id, struct session_up *sup) return; } - if (peer->state != PEER_DOWN && peer->state != PEER_NONE) + if (peer->state != PEER_DOWN && peer->state != PEER_NONE && + peer->state != PEER_UP) fatalx("peer_up: bad state"); peer->remote_bgpid = ntohl(sup->remote_bgpid); peer->short_as = sup->short_as; @@ -3220,6 +3282,50 @@ peer_down(u_int32_t id) free(peer); } +/* + * Flush all routes older then staletime. If staletime is 0 all routes will + * be flushed. + */ +void +peer_flush(struct rde_peer *peer, u_int8_t aid) +{ + struct rde_aspath *asp, *nasp; + + /* walk through per peer RIB list and remove all stale prefixes. */ + for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) { + nasp = LIST_NEXT(asp, peer_l); + path_remove_stale(asp, aid); + } + + /* Deletions are performed in path_remove() */ + rde_send_pftable_commit(); + + /* flushed no need to keep staletime */ + peer->staletime[aid] = 0; +} + +void +peer_stale(u_int32_t id, u_int8_t aid) +{ + struct rde_peer *peer; + time_t now; + + peer = peer_get(id); + if (peer == NULL) { + log_warnx("peer_stale: unknown peer id %d", id); + return; + } + + if (peer->staletime[aid]) + peer_flush(peer, aid); + peer->staletime[aid] = now = time(NULL); + + /* make sure new prefixes start on a higher timestamp */ + do { + sleep(1); + } while (now >= time(NULL)); +} + void peer_dump(u_int32_t id, u_int8_t aid) { @@ -3235,17 +3341,30 @@ peer_dump(u_int32_t id, u_int8_t aid) up_generate_default(rules_l, peer, aid); else rib_dump(&ribs[peer->ribid], rde_up_dump_upcall, peer, aid); - if (peer->capa.restart) + if (peer->capa.grestart.restart) up_generate_marker(peer, aid); } /* End-of-RIB marker, RFC 4724 */ void +peer_recv_eor(struct rde_peer *peer, u_int8_t aid) +{ + peer->prefix_rcvd_eor++; + + /* First notify SE to remove possible race with the timeout. */ + if (imsg_compose(ibuf_se, IMSG_SESSION_RESTARTED, peer->conf.id, + 0, -1, &aid, sizeof(aid)) == -1) + fatal("imsg_compose error"); +} + +void peer_send_eor(struct rde_peer *peer, u_int8_t aid) { u_int16_t afi; u_int8_t safi; + peer->prefix_sent_eor++; + if (aid == AID_INET) { u_char null[4]; diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h index 1d489c055b4..cfbceac38f7 100644 --- a/usr.sbin/bgpd/rde.h +++ b/usr.sbin/bgpd/rde.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.h,v 1.143 2012/08/12 14:24:56 claudio Exp $ */ +/* $OpenBSD: rde.h,v 1.144 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> and @@ -59,10 +59,13 @@ struct rde_peer { struct uplist_attr updates[AID_MAX]; struct uplist_prefix withdraws[AID_MAX]; struct capabilities capa; + time_t staletime[AID_MAX]; u_int64_t prefix_rcvd_update; u_int64_t prefix_rcvd_withdraw; + u_int64_t prefix_rcvd_eor; u_int64_t prefix_sent_update; u_int64_t prefix_sent_withdraw; + u_int64_t prefix_sent_eor; u_int32_t prefix_cnt; /* # of prefixes */ u_int32_t remote_bgpid; /* host byte order! */ u_int32_t up_pcnt; @@ -430,6 +433,7 @@ int path_update(struct rib *, struct rde_peer *, int path_compare(struct rde_aspath *, struct rde_aspath *); struct rde_aspath *path_lookup(struct rde_aspath *, struct rde_peer *); void path_remove(struct rde_aspath *); +void path_remove_stale(struct rde_aspath *, u_int8_t); void path_destroy(struct rde_aspath *); int path_empty(struct rde_aspath *); struct rde_aspath *path_copy(struct rde_aspath *); diff --git a/usr.sbin/bgpd/rde_rib.c b/usr.sbin/bgpd/rde_rib.c index b26ace1ab72..49fd20ccffa 100644 --- a/usr.sbin/bgpd/rde_rib.c +++ b/usr.sbin/bgpd/rde_rib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_rib.c,v 1.133 2012/07/01 11:55:13 sthen Exp $ */ +/* $OpenBSD: rde_rib.c,v 1.134 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -505,6 +505,36 @@ path_remove(struct rde_aspath *asp) } } +/* remove all stale routes or if staletime is 0 remove all routes for + a specified AID. */ +void +path_remove_stale(struct rde_aspath *asp, u_int8_t aid) +{ + struct prefix *p, *np; + time_t staletime; + + staletime = asp->peer->staletime[aid]; + for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) { + np = LIST_NEXT(p, path_l); + if (p->prefix->aid != aid) + continue; + + if (staletime && p->lastchange > staletime) + continue; + + if (asp->pftableid) { + struct bgpd_addr addr; + + pt_getaddr(p->prefix, &addr); + /* Commit is done in peer_flush() */ + rde_send_pftable(p->aspath->pftableid, &addr, + p->prefix->prefixlen, 1); + } + prefix_destroy(p); + } +} + + /* this function is only called by prefix_remove and path_remove */ void path_destroy(struct rde_aspath *asp) diff --git a/usr.sbin/bgpd/session.c b/usr.sbin/bgpd/session.c index 22d863be1ec..878f3eb1611 100644 --- a/usr.sbin/bgpd/session.c +++ b/usr.sbin/bgpd/session.c @@ -1,4 +1,4 @@ -/* $OpenBSD: session.c,v 1.323 2012/07/11 09:43:10 sthen Exp $ */ +/* $OpenBSD: session.c,v 1.324 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> @@ -69,6 +69,7 @@ void session_tcp_established(struct peer *); void session_capa_ann_none(struct peer *); int session_capa_add(struct ibuf *, u_int8_t, u_int8_t); int session_capa_add_mp(struct ibuf *, u_int8_t); +int session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t); struct bgp_msg *session_newmsg(enum msg_type, u_int16_t); int session_sendmsg(struct bgp_msg *, struct peer *); void session_open(struct peer *); @@ -77,6 +78,9 @@ void session_update(u_int32_t, void *, size_t); void session_notification(struct peer *, u_int8_t, u_int8_t, void *, ssize_t); void session_rrefresh(struct peer *, u_int8_t); +int session_graceful_restart(struct peer *); +int session_graceful_is_restarting(struct peer *); +int session_graceful_stop(struct peer *); int session_dispatch_msg(struct pollfd *, struct peer *); int session_process_msg(struct peer *); int parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *); @@ -437,6 +441,10 @@ session_main(int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2], p->state == STATE_ESTABLISHED) session_demote(p, -1); break; + case Timer_RestartTimeout: + timer_stop(p, Timer_RestartTimeout); + session_graceful_stop(p); + break; default: fatalx("King Bula lost in time"); } @@ -941,14 +949,24 @@ change_state(struct peer *peer, enum session_state state, free(peer->rbuf); peer->rbuf = NULL; bzero(&peer->capa.peer, sizeof(peer->capa.peer)); - if (peer->state == STATE_ESTABLISHED) - session_down(peer); + if (event != EVNT_STOP) { timer_set(peer, Timer_IdleHold, peer->IdleHoldTime); if (event != EVNT_NONE && peer->IdleHoldTime < MAX_IDLE_HOLD/2) peer->IdleHoldTime *= 2; } + if (peer->state == STATE_ESTABLISHED) { + if (peer->capa.neg.grestart.restart == 2 && + (event == EVNT_CON_CLOSED || + event == EVNT_CON_FATAL)) { + /* don't punish graceful restart */ + timer_set(peer, Timer_IdleHold, 0); + peer->IdleHoldTime /= 2; + session_graceful_restart(peer); + } else + session_down(peer); + } if (peer->state == STATE_NONE || peer->state == STATE_ESTABLISHED) { /* initialize capability negotiation structures */ @@ -959,6 +977,20 @@ change_state(struct peer *peer, enum session_state state, } break; case STATE_CONNECT: + if (peer->state == STATE_ESTABLISHED && + peer->capa.neg.grestart.restart == 2) { + /* do the graceful restart dance */ + session_graceful_restart(peer); + peer->holdtime = INTERVAL_HOLD_INITIAL; + timer_stop(peer, Timer_ConnectRetry); + timer_stop(peer, Timer_Keepalive); + timer_stop(peer, Timer_Hold); + timer_stop(peer, Timer_IdleHold); + timer_stop(peer, Timer_IdleHoldReset); + session_close_connection(peer); + msgbuf_clear(&peer->wbuf); + bzero(&peer->capa.peer, sizeof(peer->capa.peer)); + } break; case STATE_ACTIVE: break; @@ -1032,6 +1064,7 @@ session_accept(int listenfd) } } +open: if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { log_peer_warnx(&p->conf, "ipsec or md5sig configured but not available"); @@ -1064,6 +1097,13 @@ session_accept(int listenfd) } session_socket_blockmode(connfd, BM_NONBLOCK); bgp_fsm(p, EVNT_CON_OPEN); + return; + } else if (p != NULL && p->state == STATE_ESTABLISHED && + p->capa.neg.grestart.restart == 2) { + /* first do the graceful restart dance */ + change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); + /* then do part of the open dance */ + goto open; } else { log_conn_attempt(p, (struct sockaddr *)&cliaddr); close(connfd); @@ -1290,6 +1330,30 @@ session_capa_add_mp(struct ibuf *buf, u_int8_t aid) return (errs); } +int +session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid) +{ + u_int errs = 0; + u_int16_t afi; + u_int8_t flags, safi; + + if (aid2afi(aid, &afi, &safi)) { + log_warn("session_capa_add_gr: bad AID"); + return (1); + } + if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING) + flags = CAPA_GR_F_FLAG; + else + flags = 0; + + afi = htons(afi); + errs += ibuf_add(b, &afi, sizeof(afi)); + errs += ibuf_add(b, &safi, sizeof(safi)); + errs += ibuf_add(b, &flags, sizeof(flags)); + + return (errs); +} + struct bgp_msg * session_newmsg(enum msg_type msgtype, u_int16_t len) { @@ -1350,6 +1414,7 @@ session_open(struct peer *p) u_int16_t len; u_int8_t i, op_type, optparamlen = 0; int errs = 0; + int mpcapa = 0; if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) - @@ -1363,20 +1428,51 @@ session_open(struct peer *p) if (p->capa.ann.mp[i]) { /* 4 bytes data */ errs += session_capa_add(opb, CAPA_MP, 4); errs += session_capa_add_mp(opb, i); + mpcapa++; } /* route refresh, RFC 2918 */ if (p->capa.ann.refresh) /* no data */ errs += session_capa_add(opb, CAPA_REFRESH, 0); - /* End-of-RIB marker, RFC 4724 */ - if (p->capa.ann.restart) { /* 2 bytes data */ - u_char c[2]; + /* graceful restart and End-of-RIB marker, RFC 4724 */ + if (p->capa.ann.grestart.restart) { + int rst = 0; + u_int16_t hdr; + u_int8_t grlen; + + if (mpcapa) { + grlen = 2 + 4 * mpcapa; + for (i = 0; i < AID_MAX; i++) { + if (p->capa.neg.grestart.flags[i] & + CAPA_GR_RESTARTING) + rst++; + } + } else { /* AID_INET */ + grlen = 2 + 4; + if (p->capa.neg.grestart.flags[AID_INET] & + CAPA_GR_RESTARTING) + rst++; + } + + hdr = conf->holdtime; /* default timeout */ + /* if client does graceful restart don't set R flag */ + if (!rst) + hdr |= CAPA_GR_R_FLAG; + hdr = htons(hdr); + + errs += session_capa_add(opb, CAPA_RESTART, grlen); + errs += ibuf_add(opb, &hdr, sizeof(hdr)); - c[0] = 0x80; /* we're always restarting */ - c[1] = 0; - errs += session_capa_add(opb, CAPA_RESTART, 2); - errs += ibuf_add(opb, &c, 2); + if (mpcapa) { + for (i = 0; i < AID_MAX; i++) { + if (p->capa.ann.mp[i]) { + errs += session_capa_add_gr(p, opb, i); + } + } + } else { /* AID_INET */ + errs += session_capa_add_gr(p, opb, AID_INET); + } } /* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */ @@ -1583,6 +1679,69 @@ session_rrefresh(struct peer *p, u_int8_t aid) } int +session_graceful_restart(struct peer *p) +{ + u_int8_t i; + + timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout); + + for (i = 0; i < AID_MAX; i++) { + if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { + if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + log_peer_warnx(&p->conf, + "graceful restart of %s, keeping routes", + aid2str(i)); + p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; + } else if (p->capa.neg.mp[i]) { + if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + log_peer_warnx(&p->conf, + "graceful restart of %s, flushing routes", + aid2str(i)); + } + } + return (0); +} + +int +session_graceful_is_restarting(struct peer *p) +{ + u_int8_t i; + + for (i = 0; i < AID_MAX; i++) + if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) + return (1); + return (0); +} + +int +session_graceful_stop(struct peer *p) +{ + u_int8_t i; + + for (i = 0; i < AID_MAX; i++) { + /* + * Only flush if the peer is restarting and the peer indicated + * it hold the forwarding state. In all other cases the + * session was already flushed when the session came up. + */ + if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING && + p->capa.neg.grestart.flags[i] & CAPA_GR_FORWARD) { + log_peer_warnx(&p->conf, "graceful restart of %s, " + "time-out, flushing", aid2str(i)); + if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + } + p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; + } + return (0); +} + +int session_dispatch_msg(struct pollfd *pfd, struct peer *p) { ssize_t n; @@ -2156,7 +2315,7 @@ parse_notification(struct peer *peer) "disabling route refresh capability"); break; case CAPA_RESTART: - peer->capa.ann.restart = 0; + peer->capa.ann.grestart.restart = 0; log_peer_warnx(&peer->conf, "disabling restart capability"); break; @@ -2194,10 +2353,13 @@ parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as) u_int32_t remote_as; u_int16_t len; u_int16_t afi; + u_int16_t gr_header; u_int8_t safi; u_int8_t aid; + u_int8_t gr_flags; u_int8_t capa_code; u_int8_t capa_len; + u_int8_t i; len = dlen; while (len > 0) { @@ -2249,8 +2411,50 @@ parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as) peer->capa.peer.refresh = 1; break; case CAPA_RESTART: - peer->capa.peer.restart = 1; - /* we don't care about the further restart capas yet */ + if (capa_len == 2) { + /* peer only supports EoR marker */ + peer->capa.peer.grestart.restart = 1; + peer->capa.peer.grestart.timeout = 0; + break; + } else if (capa_len % 4 != 2) { + log_peer_warnx(&peer->conf, + "parse_capabilities: " + "expect len 2 + x*4, len is %u", capa_len); + return (-1); + } + + memcpy(&gr_header, capa_val, sizeof(gr_header)); + gr_header = ntohs(gr_header); + peer->capa.peer.grestart.timeout = + gr_header & CAPA_GR_TIMEMASK; + if (peer->capa.peer.grestart.timeout == 0) { + log_peer_warnx(&peer->conf, + "graceful restart timeout is zero"); + return (-1); + } + + for (i = 2; i <= capa_len - 4; i += 4) { + memcpy(&afi, capa_val + i, sizeof(afi)); + afi = ntohs(afi); + memcpy(&safi, capa_val + i + 2, sizeof(safi)); + if (afi2aid(afi, safi, &aid) == -1) { + log_peer_warnx(&peer->conf, + "parse_capabilities: restart: AFI " + "%u, safi %u unknown", afi, safi); + return (-1); + } + memcpy(&gr_flags, capa_val + i + 3, + sizeof(gr_flags)); + peer->capa.peer.grestart.flags[aid] |= + CAPA_GR_PRESENT; + if (gr_flags & CAPA_GR_F_FLAG) + peer->capa.peer.grestart.flags[aid] |= + CAPA_GR_FORWARD; + if (gr_header & CAPA_GR_R_FLAG) + peer->capa.peer.grestart.flags[aid] |= + CAPA_GR_RESTART; + peer->capa.peer.grestart.restart = 2; + } break; case CAPA_AS4BYTE: if (capa_len != 4) { @@ -2293,11 +2497,40 @@ capa_neg_calc(struct peer *p) } else p->capa.neg.mp[i] = 0; } - /* if no MP capability present for default IPv4 unicast mode */ + /* if no MP capability present default to IPv4 unicast mode */ if (!hasmp) p->capa.neg.mp[AID_INET] = 1; - p->capa.neg.restart = p->capa.peer.restart; + /* + * graceful restart: only the peer capabilities are of interest here. + * It is necessary to compare the new values with the previous ones + * and act acordingly. AFI/SAFI that are not part in the MP capability + * are treated as not being present. + */ + + for (i = 0; i < AID_MAX; i++) { + /* disable GR if the AFI/SAFI is not present */ + if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && + p->capa.neg.mp[i] == 0) + p->capa.peer.grestart.flags[i] = 0; /* disable */ + /* look at current GR state and decide what to do */ + if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { + if (!(p->capa.peer.grestart.flags[i] & + CAPA_GR_FORWARD)) { + if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + log_peer_warnx(&p->conf, "graceful restart of " + "%s, not restarted, flushing", aid2str(i)); + } + p->capa.neg.grestart.flags[i] = + p->capa.peer.grestart.flags[i] | CAPA_GR_RESTARTING; + } else + p->capa.neg.grestart.flags[i] = + p->capa.peer.grestart.flags[i]; + } + p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; + p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; return (0); } @@ -2315,7 +2548,7 @@ session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) u_char *data; enum reconf_action reconf; int n, depend_ok, restricted; - u_int8_t errcode, subcode; + u_int8_t aid, errcode, subcode; if ((n = imsg_read(ibuf)) == -1) fatal("session_dispatch_imsg: imsg_read error"); @@ -2626,6 +2859,40 @@ session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) break; } break; + case IMSG_SESSION_RESTARTED: + if (idx != PFD_PIPE_ROUTE) + fatalx("update request not from RDE"); + if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { + log_warnx("RDE sent invalid restart msg"); + break; + } + if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) { + log_warnx("no such peer: id=%u", + imsg.hdr.peerid); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_RESTARTED: bad AID"); + if (p->capa.neg.grestart.flags[aid] & + CAPA_GR_RESTARTING && + p->capa.neg.grestart.flags[aid] & + CAPA_GR_FORWARD) { + log_peer_warnx(&p->conf, + "graceful restart of %s finished", + aid2str(aid)); + p->capa.neg.grestart.flags[aid] &= + ~CAPA_GR_RESTARTING; + timer_stop(p, Timer_RestartTimeout); + + /* signal back to RDE to cleanup stale routes */ + if (imsg_compose(ibuf_rde, + IMSG_SESSION_RESTARTED, imsg.hdr.peerid, 0, + -1, &aid, sizeof(aid)) == -1) + fatal("imsg_compose: " + "IMSG_SESSION_RESTARTED"); + } + break; default: break; } @@ -2816,9 +3083,10 @@ session_up(struct peer *p) { struct session_up sup; - if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1, - &p->conf, sizeof(p->conf)) == -1) - fatalx("imsg_compose error"); + if (!session_graceful_is_restarting(p)) + if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1, + &p->conf, sizeof(p->conf)) == -1) + fatalx("imsg_compose error"); sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr); sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr); diff --git a/usr.sbin/bgpd/session.h b/usr.sbin/bgpd/session.h index 5b300aa6148..fcd63884458 100644 --- a/usr.sbin/bgpd/session.h +++ b/usr.sbin/bgpd/session.h @@ -1,4 +1,4 @@ -/* $OpenBSD: session.h,v 1.113 2012/04/12 17:26:09 claudio Exp $ */ +/* $OpenBSD: session.h,v 1.114 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -162,8 +162,10 @@ struct peer_stats { u_int64_t msg_sent_rrefresh; u_int64_t prefix_rcvd_update; u_int64_t prefix_rcvd_withdraw; + u_int64_t prefix_rcvd_eor; u_int64_t prefix_sent_update; u_int64_t prefix_sent_withdraw; + u_int64_t prefix_sent_eor; time_t last_updown; time_t last_read; u_int32_t prefix_cnt; @@ -179,6 +181,7 @@ enum Timer { Timer_IdleHold, Timer_IdleHoldReset, Timer_CarpUndemote, + Timer_RestartTimeout, Timer_Max }; |