diff options
Diffstat (limited to 'usr.sbin/bgpd/session.c')
-rw-r--r-- | usr.sbin/bgpd/session.c | 306 |
1 files changed, 287 insertions, 19 deletions
diff --git a/usr.sbin/bgpd/session.c b/usr.sbin/bgpd/session.c index 22d863be1ec..878f3eb1611 100644 --- a/usr.sbin/bgpd/session.c +++ b/usr.sbin/bgpd/session.c @@ -1,4 +1,4 @@ -/* $OpenBSD: session.c,v 1.323 2012/07/11 09:43:10 sthen Exp $ */ +/* $OpenBSD: session.c,v 1.324 2012/09/12 05:56:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> @@ -69,6 +69,7 @@ void session_tcp_established(struct peer *); void session_capa_ann_none(struct peer *); int session_capa_add(struct ibuf *, u_int8_t, u_int8_t); int session_capa_add_mp(struct ibuf *, u_int8_t); +int session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t); struct bgp_msg *session_newmsg(enum msg_type, u_int16_t); int session_sendmsg(struct bgp_msg *, struct peer *); void session_open(struct peer *); @@ -77,6 +78,9 @@ void session_update(u_int32_t, void *, size_t); void session_notification(struct peer *, u_int8_t, u_int8_t, void *, ssize_t); void session_rrefresh(struct peer *, u_int8_t); +int session_graceful_restart(struct peer *); +int session_graceful_is_restarting(struct peer *); +int session_graceful_stop(struct peer *); int session_dispatch_msg(struct pollfd *, struct peer *); int session_process_msg(struct peer *); int parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *); @@ -437,6 +441,10 @@ session_main(int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2], p->state == STATE_ESTABLISHED) session_demote(p, -1); break; + case Timer_RestartTimeout: + timer_stop(p, Timer_RestartTimeout); + session_graceful_stop(p); + break; default: fatalx("King Bula lost in time"); } @@ -941,14 +949,24 @@ change_state(struct peer *peer, enum session_state state, free(peer->rbuf); peer->rbuf = NULL; bzero(&peer->capa.peer, sizeof(peer->capa.peer)); - if (peer->state == STATE_ESTABLISHED) - session_down(peer); + if (event != EVNT_STOP) { timer_set(peer, Timer_IdleHold, peer->IdleHoldTime); if (event != EVNT_NONE && peer->IdleHoldTime < MAX_IDLE_HOLD/2) peer->IdleHoldTime *= 2; } + if (peer->state == STATE_ESTABLISHED) { + if (peer->capa.neg.grestart.restart == 2 && + (event == EVNT_CON_CLOSED || + event == EVNT_CON_FATAL)) { + /* don't punish graceful restart */ + timer_set(peer, Timer_IdleHold, 0); + peer->IdleHoldTime /= 2; + session_graceful_restart(peer); + } else + session_down(peer); + } if (peer->state == STATE_NONE || peer->state == STATE_ESTABLISHED) { /* initialize capability negotiation structures */ @@ -959,6 +977,20 @@ change_state(struct peer *peer, enum session_state state, } break; case STATE_CONNECT: + if (peer->state == STATE_ESTABLISHED && + peer->capa.neg.grestart.restart == 2) { + /* do the graceful restart dance */ + session_graceful_restart(peer); + peer->holdtime = INTERVAL_HOLD_INITIAL; + timer_stop(peer, Timer_ConnectRetry); + timer_stop(peer, Timer_Keepalive); + timer_stop(peer, Timer_Hold); + timer_stop(peer, Timer_IdleHold); + timer_stop(peer, Timer_IdleHoldReset); + session_close_connection(peer); + msgbuf_clear(&peer->wbuf); + bzero(&peer->capa.peer, sizeof(peer->capa.peer)); + } break; case STATE_ACTIVE: break; @@ -1032,6 +1064,7 @@ session_accept(int listenfd) } } +open: if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { log_peer_warnx(&p->conf, "ipsec or md5sig configured but not available"); @@ -1064,6 +1097,13 @@ session_accept(int listenfd) } session_socket_blockmode(connfd, BM_NONBLOCK); bgp_fsm(p, EVNT_CON_OPEN); + return; + } else if (p != NULL && p->state == STATE_ESTABLISHED && + p->capa.neg.grestart.restart == 2) { + /* first do the graceful restart dance */ + change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); + /* then do part of the open dance */ + goto open; } else { log_conn_attempt(p, (struct sockaddr *)&cliaddr); close(connfd); @@ -1290,6 +1330,30 @@ session_capa_add_mp(struct ibuf *buf, u_int8_t aid) return (errs); } +int +session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid) +{ + u_int errs = 0; + u_int16_t afi; + u_int8_t flags, safi; + + if (aid2afi(aid, &afi, &safi)) { + log_warn("session_capa_add_gr: bad AID"); + return (1); + } + if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING) + flags = CAPA_GR_F_FLAG; + else + flags = 0; + + afi = htons(afi); + errs += ibuf_add(b, &afi, sizeof(afi)); + errs += ibuf_add(b, &safi, sizeof(safi)); + errs += ibuf_add(b, &flags, sizeof(flags)); + + return (errs); +} + struct bgp_msg * session_newmsg(enum msg_type msgtype, u_int16_t len) { @@ -1350,6 +1414,7 @@ session_open(struct peer *p) u_int16_t len; u_int8_t i, op_type, optparamlen = 0; int errs = 0; + int mpcapa = 0; if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) - @@ -1363,20 +1428,51 @@ session_open(struct peer *p) if (p->capa.ann.mp[i]) { /* 4 bytes data */ errs += session_capa_add(opb, CAPA_MP, 4); errs += session_capa_add_mp(opb, i); + mpcapa++; } /* route refresh, RFC 2918 */ if (p->capa.ann.refresh) /* no data */ errs += session_capa_add(opb, CAPA_REFRESH, 0); - /* End-of-RIB marker, RFC 4724 */ - if (p->capa.ann.restart) { /* 2 bytes data */ - u_char c[2]; + /* graceful restart and End-of-RIB marker, RFC 4724 */ + if (p->capa.ann.grestart.restart) { + int rst = 0; + u_int16_t hdr; + u_int8_t grlen; + + if (mpcapa) { + grlen = 2 + 4 * mpcapa; + for (i = 0; i < AID_MAX; i++) { + if (p->capa.neg.grestart.flags[i] & + CAPA_GR_RESTARTING) + rst++; + } + } else { /* AID_INET */ + grlen = 2 + 4; + if (p->capa.neg.grestart.flags[AID_INET] & + CAPA_GR_RESTARTING) + rst++; + } + + hdr = conf->holdtime; /* default timeout */ + /* if client does graceful restart don't set R flag */ + if (!rst) + hdr |= CAPA_GR_R_FLAG; + hdr = htons(hdr); + + errs += session_capa_add(opb, CAPA_RESTART, grlen); + errs += ibuf_add(opb, &hdr, sizeof(hdr)); - c[0] = 0x80; /* we're always restarting */ - c[1] = 0; - errs += session_capa_add(opb, CAPA_RESTART, 2); - errs += ibuf_add(opb, &c, 2); + if (mpcapa) { + for (i = 0; i < AID_MAX; i++) { + if (p->capa.ann.mp[i]) { + errs += session_capa_add_gr(p, opb, i); + } + } + } else { /* AID_INET */ + errs += session_capa_add_gr(p, opb, AID_INET); + } } /* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */ @@ -1583,6 +1679,69 @@ session_rrefresh(struct peer *p, u_int8_t aid) } int +session_graceful_restart(struct peer *p) +{ + u_int8_t i; + + timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout); + + for (i = 0; i < AID_MAX; i++) { + if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { + if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + log_peer_warnx(&p->conf, + "graceful restart of %s, keeping routes", + aid2str(i)); + p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; + } else if (p->capa.neg.mp[i]) { + if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + log_peer_warnx(&p->conf, + "graceful restart of %s, flushing routes", + aid2str(i)); + } + } + return (0); +} + +int +session_graceful_is_restarting(struct peer *p) +{ + u_int8_t i; + + for (i = 0; i < AID_MAX; i++) + if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) + return (1); + return (0); +} + +int +session_graceful_stop(struct peer *p) +{ + u_int8_t i; + + for (i = 0; i < AID_MAX; i++) { + /* + * Only flush if the peer is restarting and the peer indicated + * it hold the forwarding state. In all other cases the + * session was already flushed when the session came up. + */ + if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING && + p->capa.neg.grestart.flags[i] & CAPA_GR_FORWARD) { + log_peer_warnx(&p->conf, "graceful restart of %s, " + "time-out, flushing", aid2str(i)); + if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + } + p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; + } + return (0); +} + +int session_dispatch_msg(struct pollfd *pfd, struct peer *p) { ssize_t n; @@ -2156,7 +2315,7 @@ parse_notification(struct peer *peer) "disabling route refresh capability"); break; case CAPA_RESTART: - peer->capa.ann.restart = 0; + peer->capa.ann.grestart.restart = 0; log_peer_warnx(&peer->conf, "disabling restart capability"); break; @@ -2194,10 +2353,13 @@ parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as) u_int32_t remote_as; u_int16_t len; u_int16_t afi; + u_int16_t gr_header; u_int8_t safi; u_int8_t aid; + u_int8_t gr_flags; u_int8_t capa_code; u_int8_t capa_len; + u_int8_t i; len = dlen; while (len > 0) { @@ -2249,8 +2411,50 @@ parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as) peer->capa.peer.refresh = 1; break; case CAPA_RESTART: - peer->capa.peer.restart = 1; - /* we don't care about the further restart capas yet */ + if (capa_len == 2) { + /* peer only supports EoR marker */ + peer->capa.peer.grestart.restart = 1; + peer->capa.peer.grestart.timeout = 0; + break; + } else if (capa_len % 4 != 2) { + log_peer_warnx(&peer->conf, + "parse_capabilities: " + "expect len 2 + x*4, len is %u", capa_len); + return (-1); + } + + memcpy(&gr_header, capa_val, sizeof(gr_header)); + gr_header = ntohs(gr_header); + peer->capa.peer.grestart.timeout = + gr_header & CAPA_GR_TIMEMASK; + if (peer->capa.peer.grestart.timeout == 0) { + log_peer_warnx(&peer->conf, + "graceful restart timeout is zero"); + return (-1); + } + + for (i = 2; i <= capa_len - 4; i += 4) { + memcpy(&afi, capa_val + i, sizeof(afi)); + afi = ntohs(afi); + memcpy(&safi, capa_val + i + 2, sizeof(safi)); + if (afi2aid(afi, safi, &aid) == -1) { + log_peer_warnx(&peer->conf, + "parse_capabilities: restart: AFI " + "%u, safi %u unknown", afi, safi); + return (-1); + } + memcpy(&gr_flags, capa_val + i + 3, + sizeof(gr_flags)); + peer->capa.peer.grestart.flags[aid] |= + CAPA_GR_PRESENT; + if (gr_flags & CAPA_GR_F_FLAG) + peer->capa.peer.grestart.flags[aid] |= + CAPA_GR_FORWARD; + if (gr_header & CAPA_GR_R_FLAG) + peer->capa.peer.grestart.flags[aid] |= + CAPA_GR_RESTART; + peer->capa.peer.grestart.restart = 2; + } break; case CAPA_AS4BYTE: if (capa_len != 4) { @@ -2293,11 +2497,40 @@ capa_neg_calc(struct peer *p) } else p->capa.neg.mp[i] = 0; } - /* if no MP capability present for default IPv4 unicast mode */ + /* if no MP capability present default to IPv4 unicast mode */ if (!hasmp) p->capa.neg.mp[AID_INET] = 1; - p->capa.neg.restart = p->capa.peer.restart; + /* + * graceful restart: only the peer capabilities are of interest here. + * It is necessary to compare the new values with the previous ones + * and act acordingly. AFI/SAFI that are not part in the MP capability + * are treated as not being present. + */ + + for (i = 0; i < AID_MAX; i++) { + /* disable GR if the AFI/SAFI is not present */ + if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && + p->capa.neg.mp[i] == 0) + p->capa.peer.grestart.flags[i] = 0; /* disable */ + /* look at current GR state and decide what to do */ + if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { + if (!(p->capa.peer.grestart.flags[i] & + CAPA_GR_FORWARD)) { + if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + log_peer_warnx(&p->conf, "graceful restart of " + "%s, not restarted, flushing", aid2str(i)); + } + p->capa.neg.grestart.flags[i] = + p->capa.peer.grestart.flags[i] | CAPA_GR_RESTARTING; + } else + p->capa.neg.grestart.flags[i] = + p->capa.peer.grestart.flags[i]; + } + p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; + p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; return (0); } @@ -2315,7 +2548,7 @@ session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) u_char *data; enum reconf_action reconf; int n, depend_ok, restricted; - u_int8_t errcode, subcode; + u_int8_t aid, errcode, subcode; if ((n = imsg_read(ibuf)) == -1) fatal("session_dispatch_imsg: imsg_read error"); @@ -2626,6 +2859,40 @@ session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) break; } break; + case IMSG_SESSION_RESTARTED: + if (idx != PFD_PIPE_ROUTE) + fatalx("update request not from RDE"); + if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { + log_warnx("RDE sent invalid restart msg"); + break; + } + if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) { + log_warnx("no such peer: id=%u", + imsg.hdr.peerid); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_RESTARTED: bad AID"); + if (p->capa.neg.grestart.flags[aid] & + CAPA_GR_RESTARTING && + p->capa.neg.grestart.flags[aid] & + CAPA_GR_FORWARD) { + log_peer_warnx(&p->conf, + "graceful restart of %s finished", + aid2str(aid)); + p->capa.neg.grestart.flags[aid] &= + ~CAPA_GR_RESTARTING; + timer_stop(p, Timer_RestartTimeout); + + /* signal back to RDE to cleanup stale routes */ + if (imsg_compose(ibuf_rde, + IMSG_SESSION_RESTARTED, imsg.hdr.peerid, 0, + -1, &aid, sizeof(aid)) == -1) + fatal("imsg_compose: " + "IMSG_SESSION_RESTARTED"); + } + break; default: break; } @@ -2816,9 +3083,10 @@ session_up(struct peer *p) { struct session_up sup; - if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1, - &p->conf, sizeof(p->conf)) == -1) - fatalx("imsg_compose error"); + if (!session_graceful_is_restarting(p)) + if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1, + &p->conf, sizeof(p->conf)) == -1) + fatalx("imsg_compose error"); sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr); sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr); |