summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr.sbin/bgpd/bgpd.h28
-rw-r--r--usr.sbin/bgpd/parse.y6
-rw-r--r--usr.sbin/bgpd/printconf.c4
-rw-r--r--usr.sbin/bgpd/rde.c125
-rw-r--r--usr.sbin/bgpd/rde.h6
-rw-r--r--usr.sbin/bgpd/rde_rib.c32
-rw-r--r--usr.sbin/bgpd/session.c306
-rw-r--r--usr.sbin/bgpd/session.h5
8 files changed, 477 insertions, 35 deletions
diff --git a/usr.sbin/bgpd/bgpd.h b/usr.sbin/bgpd/bgpd.h
index 68b599ade5c..85736efe262 100644
--- a/usr.sbin/bgpd/bgpd.h
+++ b/usr.sbin/bgpd/bgpd.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: bgpd.h,v 1.270 2012/05/27 18:52:07 claudio Exp $ */
+/* $OpenBSD: bgpd.h,v 1.271 2012/09/12 05:56:22 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -132,6 +132,7 @@ extern const struct aid aid_vals[];
#define AID_INET6 2
#define AID_VPN_IPv4 3
#define AID_MAX 4
+#define AID_MIN 1 /* skip AID_UNSPEC since that is a dummy */
#define AID_VALS { \
/* afi, af, safii, name */ \
@@ -253,12 +254,25 @@ struct peer_auth {
};
struct capabilities {
- int8_t mp[AID_MAX]; /* multiprotocol extensions, RFC 4760 */
- int8_t refresh; /* route refresh, RFC 2918 */
- int8_t restart; /* graceful restart, RFC 4724 */
- int8_t as4byte; /* draft-ietf-idr-as4bytes-13 */
+ struct {
+ int16_t timeout; /* graceful restart timeout */
+ int8_t flags[AID_MAX]; /* graceful restart per AID flags */
+ int8_t restart; /* graceful restart, RFC 4724 */
+ } grestart;
+ int8_t mp[AID_MAX]; /* multiprotocol extensions, RFC 4760 */
+ int8_t refresh; /* route refresh, RFC 2918 */
+ int8_t as4byte; /* 4-byte ASnum, RFC 4893 */
};
+#define CAPA_GR_PRESENT 0x01
+#define CAPA_GR_RESTART 0x02
+#define CAPA_GR_FORWARD 0x04
+#define CAPA_GR_RESTARTING 0x08
+
+#define CAPA_GR_TIMEMASK 0x0fff
+#define CAPA_GR_R_FLAG 0x8000
+#define CAPA_GR_F_FLAG 0x80
+
struct peer_config {
struct bgpd_addr remote_addr;
struct bgpd_addr local_addr;
@@ -373,6 +387,9 @@ enum imsg_type {
IMSG_SESSION_ADD,
IMSG_SESSION_UP,
IMSG_SESSION_DOWN,
+ IMSG_SESSION_STALE,
+ IMSG_SESSION_FLUSH,
+ IMSG_SESSION_RESTARTED,
IMSG_MRT_OPEN,
IMSG_MRT_REOPEN,
IMSG_MRT_CLOSE,
@@ -550,6 +567,7 @@ struct ctl_neighbor {
#define F_PREF_ACTIVE 0x02
#define F_PREF_INTERNAL 0x04
#define F_PREF_ANNOUNCE 0x08
+#define F_PREF_STALE 0x10
struct ctl_show_rib {
struct bgpd_addr true_nexthop;
diff --git a/usr.sbin/bgpd/parse.y b/usr.sbin/bgpd/parse.y
index c712abe001f..108e1ff8c18 100644
--- a/usr.sbin/bgpd/parse.y
+++ b/usr.sbin/bgpd/parse.y
@@ -1,4 +1,4 @@
-/* $OpenBSD: parse.y,v 1.262 2012/07/13 15:25:37 claudio Exp $ */
+/* $OpenBSD: parse.y,v 1.263 2012/09/12 05:56:22 claudio Exp $ */
/*
* Copyright (c) 2002, 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -1038,7 +1038,7 @@ peeropts : REMOTEAS as4number {
curpeer->conf.capabilities.refresh = $3;
}
| ANNOUNCE RESTART yesno {
- curpeer->conf.capabilities.restart = $3;
+ curpeer->conf.capabilities.grestart.restart = $3;
}
| ANNOUNCE AS4BYTE yesno {
curpeer->conf.capabilities.as4byte = $3;
@@ -3019,7 +3019,7 @@ alloc_peer(void)
for (i = 0; i < AID_MAX; i++)
p->conf.capabilities.mp[i] = -1;
p->conf.capabilities.refresh = 1;
- p->conf.capabilities.restart = 0;
+ p->conf.capabilities.grestart.restart = 0;
p->conf.capabilities.as4byte = 1;
p->conf.local_as = conf->as;
p->conf.local_short_as = conf->short_as;
diff --git a/usr.sbin/bgpd/printconf.c b/usr.sbin/bgpd/printconf.c
index 6599761c638..88ffb052197 100644
--- a/usr.sbin/bgpd/printconf.c
+++ b/usr.sbin/bgpd/printconf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: printconf.c,v 1.86 2011/09/17 16:29:44 claudio Exp $ */
+/* $OpenBSD: printconf.c,v 1.87 2012/09/12 05:56:22 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -377,7 +377,7 @@ print_peer(struct peer_config *p, struct bgpd_config *conf, const char *c)
printf("%s\tannounce capabilities no\n", c);
if (p->capabilities.refresh == 0)
printf("%s\tannounce refresh no\n", c);
- if (p->capabilities.restart == 1)
+ if (p->capabilities.grestart.restart == 1)
printf("%s\tannounce restart yes\n", c);
if (p->capabilities.as4byte == 0)
printf("%s\tannounce as4byte no\n", c);
diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c
index 8459ac67739..cd4456c4a8a 100644
--- a/usr.sbin/bgpd/rde.c
+++ b/usr.sbin/bgpd/rde.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: rde.c,v 1.317 2012/08/12 14:24:56 claudio Exp $ */
+/* $OpenBSD: rde.c,v 1.318 2012/09/12 05:56:22 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -101,6 +101,9 @@ struct rde_peer *peer_add(u_int32_t, struct peer_config *);
struct rde_peer *peer_get(u_int32_t);
void peer_up(u_int32_t, struct session_up *);
void peer_down(u_int32_t);
+void peer_flush(struct rde_peer *, u_int8_t);
+void peer_stale(u_int32_t, u_int8_t);
+void peer_recv_eor(struct rde_peer *, u_int8_t);
void peer_dump(u_int32_t, u_int8_t);
void peer_send_eor(struct rde_peer *, u_int8_t);
@@ -407,6 +410,47 @@ rde_dispatch_imsg_session(struct imsgbuf *ibuf)
case IMSG_SESSION_DOWN:
peer_down(imsg.hdr.peerid);
break;
+ case IMSG_SESSION_STALE:
+ if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
+ log_warnx("rde_dispatch: wrong imsg len");
+ break;
+ }
+ memcpy(&aid, imsg.data, sizeof(aid));
+ if (aid >= AID_MAX)
+ fatalx("IMSG_SESSION_STALE: bad AID");
+ peer_stale(imsg.hdr.peerid, aid);
+ break;
+ case IMSG_SESSION_FLUSH:
+ if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
+ log_warnx("rde_dispatch: wrong imsg len");
+ break;
+ }
+ memcpy(&aid, imsg.data, sizeof(aid));
+ if (aid >= AID_MAX)
+ fatalx("IMSG_SESSION_FLUSH: bad AID");
+ if ((peer = peer_get(imsg.hdr.peerid)) == NULL) {
+ log_warnx("rde_dispatch: unknown peer id %d",
+ imsg.hdr.peerid);
+ break;
+ }
+ peer_flush(peer, aid);
+ break;
+ case IMSG_SESSION_RESTARTED:
+ if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
+ log_warnx("rde_dispatch: wrong imsg len");
+ break;
+ }
+ memcpy(&aid, imsg.data, sizeof(aid));
+ if (aid >= AID_MAX)
+ fatalx("IMSG_SESSION_RESTARTED: bad AID");
+ if ((peer = peer_get(imsg.hdr.peerid)) == NULL) {
+ log_warnx("rde_dispatch: unknown peer id %d",
+ imsg.hdr.peerid);
+ break;
+ }
+ if (peer->staletime[aid])
+ peer_flush(peer, aid);
+ break;
case IMSG_REFRESH:
if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
log_warnx("rde_dispatch: wrong imsg len");
@@ -559,10 +603,14 @@ badnet:
peer->prefix_rcvd_update;
p.stats.prefix_rcvd_withdraw =
peer->prefix_rcvd_withdraw;
+ p.stats.prefix_rcvd_eor =
+ peer->prefix_rcvd_eor;
p.stats.prefix_sent_update =
peer->prefix_sent_update;
p.stats.prefix_sent_withdraw =
peer->prefix_sent_withdraw;
+ p.stats.prefix_sent_eor =
+ peer->prefix_sent_eor;
}
imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0,
imsg.hdr.pid, -1, &p, sizeof(struct peer));
@@ -1020,6 +1068,10 @@ rde_update_dispatch(struct imsg *imsg)
ERR_UPD_ATTRLIST, NULL, 0);
return (-1);
}
+ if (withdrawn_len == 0) {
+ /* EoR marker */
+ peer_recv_eor(peer, AID_INET);
+ }
return (0);
}
@@ -1050,6 +1102,11 @@ rde_update_dispatch(struct imsg *imsg)
goto done;
}
+ if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0 && mplen == 0) {
+ /* EoR marker */
+ peer_recv_eor(peer, aid);
+ }
+
switch (aid) {
case AID_INET6:
while (mplen > 0) {
@@ -2165,6 +2222,7 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags)
struct ibuf *wbuf;
struct attr *a;
void *bp;
+ time_t staletime;;
u_int8_t l;
bzero(&rib, sizeof(rib));
@@ -2201,6 +2259,9 @@ rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags)
rib.flags |= F_PREF_ELIGIBLE;
if (asp->flags & F_ATTR_LOOP)
rib.flags &= ~F_PREF_ELIGIBLE;
+ staletime = asp->peer->staletime[p->prefix->aid];
+ if (staletime && p->lastchange <= staletime)
+ rib.flags |= F_PREF_STALE;
rib.aspath_len = aspath_length(asp->aspath);
if ((wbuf = imsg_create(ibuf_se_ctl, IMSG_CTL_SHOW_RIB, 0, pid,
@@ -3163,7 +3224,8 @@ peer_up(u_int32_t id, struct session_up *sup)
return;
}
- if (peer->state != PEER_DOWN && peer->state != PEER_NONE)
+ if (peer->state != PEER_DOWN && peer->state != PEER_NONE &&
+ peer->state != PEER_UP)
fatalx("peer_up: bad state");
peer->remote_bgpid = ntohl(sup->remote_bgpid);
peer->short_as = sup->short_as;
@@ -3220,6 +3282,50 @@ peer_down(u_int32_t id)
free(peer);
}
+/*
+ * Flush all routes older then staletime. If staletime is 0 all routes will
+ * be flushed.
+ */
+void
+peer_flush(struct rde_peer *peer, u_int8_t aid)
+{
+ struct rde_aspath *asp, *nasp;
+
+ /* walk through per peer RIB list and remove all stale prefixes. */
+ for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) {
+ nasp = LIST_NEXT(asp, peer_l);
+ path_remove_stale(asp, aid);
+ }
+
+ /* Deletions are performed in path_remove() */
+ rde_send_pftable_commit();
+
+ /* flushed no need to keep staletime */
+ peer->staletime[aid] = 0;
+}
+
+void
+peer_stale(u_int32_t id, u_int8_t aid)
+{
+ struct rde_peer *peer;
+ time_t now;
+
+ peer = peer_get(id);
+ if (peer == NULL) {
+ log_warnx("peer_stale: unknown peer id %d", id);
+ return;
+ }
+
+ if (peer->staletime[aid])
+ peer_flush(peer, aid);
+ peer->staletime[aid] = now = time(NULL);
+
+ /* make sure new prefixes start on a higher timestamp */
+ do {
+ sleep(1);
+ } while (now >= time(NULL));
+}
+
void
peer_dump(u_int32_t id, u_int8_t aid)
{
@@ -3235,17 +3341,30 @@ peer_dump(u_int32_t id, u_int8_t aid)
up_generate_default(rules_l, peer, aid);
else
rib_dump(&ribs[peer->ribid], rde_up_dump_upcall, peer, aid);
- if (peer->capa.restart)
+ if (peer->capa.grestart.restart)
up_generate_marker(peer, aid);
}
/* End-of-RIB marker, RFC 4724 */
void
+peer_recv_eor(struct rde_peer *peer, u_int8_t aid)
+{
+ peer->prefix_rcvd_eor++;
+
+ /* First notify SE to remove possible race with the timeout. */
+ if (imsg_compose(ibuf_se, IMSG_SESSION_RESTARTED, peer->conf.id,
+ 0, -1, &aid, sizeof(aid)) == -1)
+ fatal("imsg_compose error");
+}
+
+void
peer_send_eor(struct rde_peer *peer, u_int8_t aid)
{
u_int16_t afi;
u_int8_t safi;
+ peer->prefix_sent_eor++;
+
if (aid == AID_INET) {
u_char null[4];
diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h
index 1d489c055b4..cfbceac38f7 100644
--- a/usr.sbin/bgpd/rde.h
+++ b/usr.sbin/bgpd/rde.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: rde.h,v 1.143 2012/08/12 14:24:56 claudio Exp $ */
+/* $OpenBSD: rde.h,v 1.144 2012/09/12 05:56:22 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> and
@@ -59,10 +59,13 @@ struct rde_peer {
struct uplist_attr updates[AID_MAX];
struct uplist_prefix withdraws[AID_MAX];
struct capabilities capa;
+ time_t staletime[AID_MAX];
u_int64_t prefix_rcvd_update;
u_int64_t prefix_rcvd_withdraw;
+ u_int64_t prefix_rcvd_eor;
u_int64_t prefix_sent_update;
u_int64_t prefix_sent_withdraw;
+ u_int64_t prefix_sent_eor;
u_int32_t prefix_cnt; /* # of prefixes */
u_int32_t remote_bgpid; /* host byte order! */
u_int32_t up_pcnt;
@@ -430,6 +433,7 @@ int path_update(struct rib *, struct rde_peer *,
int path_compare(struct rde_aspath *, struct rde_aspath *);
struct rde_aspath *path_lookup(struct rde_aspath *, struct rde_peer *);
void path_remove(struct rde_aspath *);
+void path_remove_stale(struct rde_aspath *, u_int8_t);
void path_destroy(struct rde_aspath *);
int path_empty(struct rde_aspath *);
struct rde_aspath *path_copy(struct rde_aspath *);
diff --git a/usr.sbin/bgpd/rde_rib.c b/usr.sbin/bgpd/rde_rib.c
index b26ace1ab72..49fd20ccffa 100644
--- a/usr.sbin/bgpd/rde_rib.c
+++ b/usr.sbin/bgpd/rde_rib.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: rde_rib.c,v 1.133 2012/07/01 11:55:13 sthen Exp $ */
+/* $OpenBSD: rde_rib.c,v 1.134 2012/09/12 05:56:22 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org>
@@ -505,6 +505,36 @@ path_remove(struct rde_aspath *asp)
}
}
+/* remove all stale routes or if staletime is 0 remove all routes for
+ a specified AID. */
+void
+path_remove_stale(struct rde_aspath *asp, u_int8_t aid)
+{
+ struct prefix *p, *np;
+ time_t staletime;
+
+ staletime = asp->peer->staletime[aid];
+ for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) {
+ np = LIST_NEXT(p, path_l);
+ if (p->prefix->aid != aid)
+ continue;
+
+ if (staletime && p->lastchange > staletime)
+ continue;
+
+ if (asp->pftableid) {
+ struct bgpd_addr addr;
+
+ pt_getaddr(p->prefix, &addr);
+ /* Commit is done in peer_flush() */
+ rde_send_pftable(p->aspath->pftableid, &addr,
+ p->prefix->prefixlen, 1);
+ }
+ prefix_destroy(p);
+ }
+}
+
+
/* this function is only called by prefix_remove and path_remove */
void
path_destroy(struct rde_aspath *asp)
diff --git a/usr.sbin/bgpd/session.c b/usr.sbin/bgpd/session.c
index 22d863be1ec..878f3eb1611 100644
--- a/usr.sbin/bgpd/session.c
+++ b/usr.sbin/bgpd/session.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: session.c,v 1.323 2012/07/11 09:43:10 sthen Exp $ */
+/* $OpenBSD: session.c,v 1.324 2012/09/12 05:56:22 claudio Exp $ */
/*
* Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
@@ -69,6 +69,7 @@ void session_tcp_established(struct peer *);
void session_capa_ann_none(struct peer *);
int session_capa_add(struct ibuf *, u_int8_t, u_int8_t);
int session_capa_add_mp(struct ibuf *, u_int8_t);
+int session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t);
struct bgp_msg *session_newmsg(enum msg_type, u_int16_t);
int session_sendmsg(struct bgp_msg *, struct peer *);
void session_open(struct peer *);
@@ -77,6 +78,9 @@ void session_update(u_int32_t, void *, size_t);
void session_notification(struct peer *, u_int8_t, u_int8_t, void *,
ssize_t);
void session_rrefresh(struct peer *, u_int8_t);
+int session_graceful_restart(struct peer *);
+int session_graceful_is_restarting(struct peer *);
+int session_graceful_stop(struct peer *);
int session_dispatch_msg(struct pollfd *, struct peer *);
int session_process_msg(struct peer *);
int parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
@@ -437,6 +441,10 @@ session_main(int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2],
p->state == STATE_ESTABLISHED)
session_demote(p, -1);
break;
+ case Timer_RestartTimeout:
+ timer_stop(p, Timer_RestartTimeout);
+ session_graceful_stop(p);
+ break;
default:
fatalx("King Bula lost in time");
}
@@ -941,14 +949,24 @@ change_state(struct peer *peer, enum session_state state,
free(peer->rbuf);
peer->rbuf = NULL;
bzero(&peer->capa.peer, sizeof(peer->capa.peer));
- if (peer->state == STATE_ESTABLISHED)
- session_down(peer);
+
if (event != EVNT_STOP) {
timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
if (event != EVNT_NONE &&
peer->IdleHoldTime < MAX_IDLE_HOLD/2)
peer->IdleHoldTime *= 2;
}
+ if (peer->state == STATE_ESTABLISHED) {
+ if (peer->capa.neg.grestart.restart == 2 &&
+ (event == EVNT_CON_CLOSED ||
+ event == EVNT_CON_FATAL)) {
+ /* don't punish graceful restart */
+ timer_set(peer, Timer_IdleHold, 0);
+ peer->IdleHoldTime /= 2;
+ session_graceful_restart(peer);
+ } else
+ session_down(peer);
+ }
if (peer->state == STATE_NONE ||
peer->state == STATE_ESTABLISHED) {
/* initialize capability negotiation structures */
@@ -959,6 +977,20 @@ change_state(struct peer *peer, enum session_state state,
}
break;
case STATE_CONNECT:
+ if (peer->state == STATE_ESTABLISHED &&
+ peer->capa.neg.grestart.restart == 2) {
+ /* do the graceful restart dance */
+ session_graceful_restart(peer);
+ peer->holdtime = INTERVAL_HOLD_INITIAL;
+ timer_stop(peer, Timer_ConnectRetry);
+ timer_stop(peer, Timer_Keepalive);
+ timer_stop(peer, Timer_Hold);
+ timer_stop(peer, Timer_IdleHold);
+ timer_stop(peer, Timer_IdleHoldReset);
+ session_close_connection(peer);
+ msgbuf_clear(&peer->wbuf);
+ bzero(&peer->capa.peer, sizeof(peer->capa.peer));
+ }
break;
case STATE_ACTIVE:
break;
@@ -1032,6 +1064,7 @@ session_accept(int listenfd)
}
}
+open:
if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
log_peer_warnx(&p->conf,
"ipsec or md5sig configured but not available");
@@ -1064,6 +1097,13 @@ session_accept(int listenfd)
}
session_socket_blockmode(connfd, BM_NONBLOCK);
bgp_fsm(p, EVNT_CON_OPEN);
+ return;
+ } else if (p != NULL && p->state == STATE_ESTABLISHED &&
+ p->capa.neg.grestart.restart == 2) {
+ /* first do the graceful restart dance */
+ change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
+ /* then do part of the open dance */
+ goto open;
} else {
log_conn_attempt(p, (struct sockaddr *)&cliaddr);
close(connfd);
@@ -1290,6 +1330,30 @@ session_capa_add_mp(struct ibuf *buf, u_int8_t aid)
return (errs);
}
+int
+session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid)
+{
+ u_int errs = 0;
+ u_int16_t afi;
+ u_int8_t flags, safi;
+
+ if (aid2afi(aid, &afi, &safi)) {
+ log_warn("session_capa_add_gr: bad AID");
+ return (1);
+ }
+ if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING)
+ flags = CAPA_GR_F_FLAG;
+ else
+ flags = 0;
+
+ afi = htons(afi);
+ errs += ibuf_add(b, &afi, sizeof(afi));
+ errs += ibuf_add(b, &safi, sizeof(safi));
+ errs += ibuf_add(b, &flags, sizeof(flags));
+
+ return (errs);
+}
+
struct bgp_msg *
session_newmsg(enum msg_type msgtype, u_int16_t len)
{
@@ -1350,6 +1414,7 @@ session_open(struct peer *p)
u_int16_t len;
u_int8_t i, op_type, optparamlen = 0;
int errs = 0;
+ int mpcapa = 0;
if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) -
@@ -1363,20 +1428,51 @@ session_open(struct peer *p)
if (p->capa.ann.mp[i]) { /* 4 bytes data */
errs += session_capa_add(opb, CAPA_MP, 4);
errs += session_capa_add_mp(opb, i);
+ mpcapa++;
}
/* route refresh, RFC 2918 */
if (p->capa.ann.refresh) /* no data */
errs += session_capa_add(opb, CAPA_REFRESH, 0);
- /* End-of-RIB marker, RFC 4724 */
- if (p->capa.ann.restart) { /* 2 bytes data */
- u_char c[2];
+ /* graceful restart and End-of-RIB marker, RFC 4724 */
+ if (p->capa.ann.grestart.restart) {
+ int rst = 0;
+ u_int16_t hdr;
+ u_int8_t grlen;
+
+ if (mpcapa) {
+ grlen = 2 + 4 * mpcapa;
+ for (i = 0; i < AID_MAX; i++) {
+ if (p->capa.neg.grestart.flags[i] &
+ CAPA_GR_RESTARTING)
+ rst++;
+ }
+ } else { /* AID_INET */
+ grlen = 2 + 4;
+ if (p->capa.neg.grestart.flags[AID_INET] &
+ CAPA_GR_RESTARTING)
+ rst++;
+ }
+
+ hdr = conf->holdtime; /* default timeout */
+ /* if client does graceful restart don't set R flag */
+ if (!rst)
+ hdr |= CAPA_GR_R_FLAG;
+ hdr = htons(hdr);
+
+ errs += session_capa_add(opb, CAPA_RESTART, grlen);
+ errs += ibuf_add(opb, &hdr, sizeof(hdr));
- c[0] = 0x80; /* we're always restarting */
- c[1] = 0;
- errs += session_capa_add(opb, CAPA_RESTART, 2);
- errs += ibuf_add(opb, &c, 2);
+ if (mpcapa) {
+ for (i = 0; i < AID_MAX; i++) {
+ if (p->capa.ann.mp[i]) {
+ errs += session_capa_add_gr(p, opb, i);
+ }
+ }
+ } else { /* AID_INET */
+ errs += session_capa_add_gr(p, opb, AID_INET);
+ }
}
/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
@@ -1583,6 +1679,69 @@ session_rrefresh(struct peer *p, u_int8_t aid)
}
int
+session_graceful_restart(struct peer *p)
+{
+ u_int8_t i;
+
+ timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout);
+
+ for (i = 0; i < AID_MAX; i++) {
+ if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
+ if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE,
+ p->conf.id, 0, -1, &i, sizeof(i)) == -1)
+ return (-1);
+ log_peer_warnx(&p->conf,
+ "graceful restart of %s, keeping routes",
+ aid2str(i));
+ p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
+ } else if (p->capa.neg.mp[i]) {
+ if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
+ p->conf.id, 0, -1, &i, sizeof(i)) == -1)
+ return (-1);
+ log_peer_warnx(&p->conf,
+ "graceful restart of %s, flushing routes",
+ aid2str(i));
+ }
+ }
+ return (0);
+}
+
+int
+session_graceful_is_restarting(struct peer *p)
+{
+ u_int8_t i;
+
+ for (i = 0; i < AID_MAX; i++)
+ if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING)
+ return (1);
+ return (0);
+}
+
+int
+session_graceful_stop(struct peer *p)
+{
+ u_int8_t i;
+
+ for (i = 0; i < AID_MAX; i++) {
+ /*
+ * Only flush if the peer is restarting and the peer indicated
+ * it hold the forwarding state. In all other cases the
+ * session was already flushed when the session came up.
+ */
+ if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING &&
+ p->capa.neg.grestart.flags[i] & CAPA_GR_FORWARD) {
+ log_peer_warnx(&p->conf, "graceful restart of %s, "
+ "time-out, flushing", aid2str(i));
+ if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
+ p->conf.id, 0, -1, &i, sizeof(i)) == -1)
+ return (-1);
+ }
+ p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
+ }
+ return (0);
+}
+
+int
session_dispatch_msg(struct pollfd *pfd, struct peer *p)
{
ssize_t n;
@@ -2156,7 +2315,7 @@ parse_notification(struct peer *peer)
"disabling route refresh capability");
break;
case CAPA_RESTART:
- peer->capa.ann.restart = 0;
+ peer->capa.ann.grestart.restart = 0;
log_peer_warnx(&peer->conf,
"disabling restart capability");
break;
@@ -2194,10 +2353,13 @@ parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
u_int32_t remote_as;
u_int16_t len;
u_int16_t afi;
+ u_int16_t gr_header;
u_int8_t safi;
u_int8_t aid;
+ u_int8_t gr_flags;
u_int8_t capa_code;
u_int8_t capa_len;
+ u_int8_t i;
len = dlen;
while (len > 0) {
@@ -2249,8 +2411,50 @@ parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
peer->capa.peer.refresh = 1;
break;
case CAPA_RESTART:
- peer->capa.peer.restart = 1;
- /* we don't care about the further restart capas yet */
+ if (capa_len == 2) {
+ /* peer only supports EoR marker */
+ peer->capa.peer.grestart.restart = 1;
+ peer->capa.peer.grestart.timeout = 0;
+ break;
+ } else if (capa_len % 4 != 2) {
+ log_peer_warnx(&peer->conf,
+ "parse_capabilities: "
+ "expect len 2 + x*4, len is %u", capa_len);
+ return (-1);
+ }
+
+ memcpy(&gr_header, capa_val, sizeof(gr_header));
+ gr_header = ntohs(gr_header);
+ peer->capa.peer.grestart.timeout =
+ gr_header & CAPA_GR_TIMEMASK;
+ if (peer->capa.peer.grestart.timeout == 0) {
+ log_peer_warnx(&peer->conf,
+ "graceful restart timeout is zero");
+ return (-1);
+ }
+
+ for (i = 2; i <= capa_len - 4; i += 4) {
+ memcpy(&afi, capa_val + i, sizeof(afi));
+ afi = ntohs(afi);
+ memcpy(&safi, capa_val + i + 2, sizeof(safi));
+ if (afi2aid(afi, safi, &aid) == -1) {
+ log_peer_warnx(&peer->conf,
+ "parse_capabilities: restart: AFI "
+ "%u, safi %u unknown", afi, safi);
+ return (-1);
+ }
+ memcpy(&gr_flags, capa_val + i + 3,
+ sizeof(gr_flags));
+ peer->capa.peer.grestart.flags[aid] |=
+ CAPA_GR_PRESENT;
+ if (gr_flags & CAPA_GR_F_FLAG)
+ peer->capa.peer.grestart.flags[aid] |=
+ CAPA_GR_FORWARD;
+ if (gr_header & CAPA_GR_R_FLAG)
+ peer->capa.peer.grestart.flags[aid] |=
+ CAPA_GR_RESTART;
+ peer->capa.peer.grestart.restart = 2;
+ }
break;
case CAPA_AS4BYTE:
if (capa_len != 4) {
@@ -2293,11 +2497,40 @@ capa_neg_calc(struct peer *p)
} else
p->capa.neg.mp[i] = 0;
}
- /* if no MP capability present for default IPv4 unicast mode */
+ /* if no MP capability present default to IPv4 unicast mode */
if (!hasmp)
p->capa.neg.mp[AID_INET] = 1;
- p->capa.neg.restart = p->capa.peer.restart;
+ /*
+ * graceful restart: only the peer capabilities are of interest here.
+ * It is necessary to compare the new values with the previous ones
+ * and act acordingly. AFI/SAFI that are not part in the MP capability
+ * are treated as not being present.
+ */
+
+ for (i = 0; i < AID_MAX; i++) {
+ /* disable GR if the AFI/SAFI is not present */
+ if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
+ p->capa.neg.mp[i] == 0)
+ p->capa.peer.grestart.flags[i] = 0; /* disable */
+ /* look at current GR state and decide what to do */
+ if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
+ if (!(p->capa.peer.grestart.flags[i] &
+ CAPA_GR_FORWARD)) {
+ if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
+ p->conf.id, 0, -1, &i, sizeof(i)) == -1)
+ return (-1);
+ log_peer_warnx(&p->conf, "graceful restart of "
+ "%s, not restarted, flushing", aid2str(i));
+ }
+ p->capa.neg.grestart.flags[i] =
+ p->capa.peer.grestart.flags[i] | CAPA_GR_RESTARTING;
+ } else
+ p->capa.neg.grestart.flags[i] =
+ p->capa.peer.grestart.flags[i];
+ }
+ p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
+ p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
return (0);
}
@@ -2315,7 +2548,7 @@ session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
u_char *data;
enum reconf_action reconf;
int n, depend_ok, restricted;
- u_int8_t errcode, subcode;
+ u_int8_t aid, errcode, subcode;
if ((n = imsg_read(ibuf)) == -1)
fatal("session_dispatch_imsg: imsg_read error");
@@ -2626,6 +2859,40 @@ session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
break;
}
break;
+ case IMSG_SESSION_RESTARTED:
+ if (idx != PFD_PIPE_ROUTE)
+ fatalx("update request not from RDE");
+ if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) {
+ log_warnx("RDE sent invalid restart msg");
+ break;
+ }
+ if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
+ log_warnx("no such peer: id=%u",
+ imsg.hdr.peerid);
+ break;
+ }
+ memcpy(&aid, imsg.data, sizeof(aid));
+ if (aid >= AID_MAX)
+ fatalx("IMSG_SESSION_RESTARTED: bad AID");
+ if (p->capa.neg.grestart.flags[aid] &
+ CAPA_GR_RESTARTING &&
+ p->capa.neg.grestart.flags[aid] &
+ CAPA_GR_FORWARD) {
+ log_peer_warnx(&p->conf,
+ "graceful restart of %s finished",
+ aid2str(aid));
+ p->capa.neg.grestart.flags[aid] &=
+ ~CAPA_GR_RESTARTING;
+ timer_stop(p, Timer_RestartTimeout);
+
+ /* signal back to RDE to cleanup stale routes */
+ if (imsg_compose(ibuf_rde,
+ IMSG_SESSION_RESTARTED, imsg.hdr.peerid, 0,
+ -1, &aid, sizeof(aid)) == -1)
+ fatal("imsg_compose: "
+ "IMSG_SESSION_RESTARTED");
+ }
+ break;
default:
break;
}
@@ -2816,9 +3083,10 @@ session_up(struct peer *p)
{
struct session_up sup;
- if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
- &p->conf, sizeof(p->conf)) == -1)
- fatalx("imsg_compose error");
+ if (!session_graceful_is_restarting(p))
+ if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
+ &p->conf, sizeof(p->conf)) == -1)
+ fatalx("imsg_compose error");
sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr);
sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr);
diff --git a/usr.sbin/bgpd/session.h b/usr.sbin/bgpd/session.h
index 5b300aa6148..fcd63884458 100644
--- a/usr.sbin/bgpd/session.h
+++ b/usr.sbin/bgpd/session.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: session.h,v 1.113 2012/04/12 17:26:09 claudio Exp $ */
+/* $OpenBSD: session.h,v 1.114 2012/09/12 05:56:22 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -162,8 +162,10 @@ struct peer_stats {
u_int64_t msg_sent_rrefresh;
u_int64_t prefix_rcvd_update;
u_int64_t prefix_rcvd_withdraw;
+ u_int64_t prefix_rcvd_eor;
u_int64_t prefix_sent_update;
u_int64_t prefix_sent_withdraw;
+ u_int64_t prefix_sent_eor;
time_t last_updown;
time_t last_read;
u_int32_t prefix_cnt;
@@ -179,6 +181,7 @@ enum Timer {
Timer_IdleHold,
Timer_IdleHoldReset,
Timer_CarpUndemote,
+ Timer_RestartTimeout,
Timer_Max
};