/* $OpenBSD: kroute.c,v 1.61 2016/06/18 01:25:53 renato Exp $ */ /* * Copyright (c) 2015, 2016 Renato Westphal * Copyright (c) 2009 Michele Marchetto * Copyright (c) 2004 Esben Norby * Copyright (c) 2003, 2004 Henning Brauer * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ldpd.h" #include "log.h" struct { uint32_t rtseq; pid_t pid; int fib_sync; int fd; int ioctl_fd; struct event ev; } kr_state; struct kroute_node { TAILQ_ENTRY(kroute_node) entry; struct kroute_priority *kprio; /* back pointer */ struct kroute r; }; struct kroute_priority { TAILQ_ENTRY(kroute_priority) entry; struct kroute_prefix *kp; /* back pointer */ uint8_t priority; TAILQ_HEAD(, kroute_node) nexthops; }; struct kroute_prefix { RB_ENTRY(kroute_prefix) entry; int af; union ldpd_addr prefix; uint8_t prefixlen; TAILQ_HEAD(plist, kroute_priority) priorities; }; RB_HEAD(kroute_tree, kroute_prefix); RB_PROTOTYPE(kroute_tree, kroute_prefix, entry, kroute_compare) struct kif_addr { TAILQ_ENTRY(kif_addr) entry; struct kaddr a; }; struct kif_node { RB_ENTRY(kif_node) entry; TAILQ_HEAD(, kif_addr) addrs; struct kif k; struct kpw *kpw; }; RB_HEAD(kif_tree, kif_node); RB_PROTOTYPE(kif_tree, kif_node, entry, kif_compare) static void kr_dispatch_msg(int, short, void *); static void kr_redist_remove(struct kroute *); static int kr_redist_eval(struct kroute *); static void kr_redistribute(struct kroute_prefix *); static __inline int kroute_compare(struct kroute_prefix *, struct kroute_prefix *); static struct kroute_prefix *kroute_find_prefix(int, union ldpd_addr *, uint8_t); static struct kroute_priority *kroute_find_prio(struct kroute_prefix *, uint8_t); static struct kroute_node *kroute_find_gw(struct kroute_priority *, union ldpd_addr *); static int kroute_insert(struct kroute *); static int kroute_uninstall(struct kroute_node *); static int kroute_remove(struct kroute *); static void kroute_clear(void); static __inline int kif_compare(struct kif_node *, struct kif_node *); static struct kif_node *kif_find(unsigned short); static struct kif_node *kif_insert(unsigned short); static int kif_remove(struct kif_node *); static struct kif_node *kif_update(unsigned short, int, struct if_data *, struct sockaddr_dl *, int *); static struct kroute_priority *kroute_match(int, union ldpd_addr *); static uint8_t prefixlen_classful(in_addr_t); static void get_rtaddrs(int, struct sockaddr *, struct sockaddr **); static void if_change(unsigned short, int, struct if_data *, struct sockaddr_dl *); static void if_newaddr(unsigned short, struct sockaddr *, struct sockaddr *, struct sockaddr *); static void if_deladdr(unsigned short, struct sockaddr *, struct sockaddr *, struct sockaddr *); static void if_announce(void *); static int send_rtmsg(int, int, struct kroute *, int); static int send_rtmsg_v4(int fd, int, struct kroute *, int); static int send_rtmsg_v6(int fd, int, struct kroute *, int); static int fetchtable(void); static int fetchifs(void); static int dispatch_rtmsg(void); static int rtmsg_process(char *, size_t); static int rtmsg_process_route(struct rt_msghdr *, struct sockaddr *[RTAX_MAX]); static int kmpw_install(const char *, struct kpw *); static int kmpw_uninstall(const char *); RB_GENERATE(kroute_tree, kroute_prefix, entry, kroute_compare) RB_GENERATE(kif_tree, kif_node, entry, kif_compare) static struct kroute_tree krt = RB_INITIALIZER(&krt); static struct kif_tree kit = RB_INITIALIZER(&kit); int kif_init(void) { if (fetchifs() == -1) return (-1); return (0); } int kr_init(int fs) { int opt = 0, rcvbuf, default_rcvbuf; socklen_t optlen; unsigned int rtfilter; kr_state.fib_sync = fs; if ((kr_state.fd = socket(AF_ROUTE, SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)) == -1) { log_warn("%s: socket", __func__); return (-1); } /* not interested in my own messages */ if (setsockopt(kr_state.fd, SOL_SOCKET, SO_USELOOPBACK, &opt, sizeof(opt)) == -1) log_warn("%s: setsockopt(SO_USELOOPBACK)", __func__); /* filter out unwanted messages */ rtfilter = ROUTE_FILTER(RTM_ADD) | ROUTE_FILTER(RTM_GET) | ROUTE_FILTER(RTM_CHANGE) | ROUTE_FILTER(RTM_DELETE) | ROUTE_FILTER(RTM_IFINFO) | ROUTE_FILTER(RTM_NEWADDR) | ROUTE_FILTER(RTM_DELADDR) | ROUTE_FILTER(RTM_IFANNOUNCE); if (setsockopt(kr_state.fd, PF_ROUTE, ROUTE_MSGFILTER, &rtfilter, sizeof(rtfilter)) == -1) log_warn("%s: setsockopt(ROUTE_MSGFILTER)", __func__); /* grow receive buffer, don't wanna miss messages */ optlen = sizeof(default_rcvbuf); if (getsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF, &default_rcvbuf, &optlen) == -1) log_warn("%s: getsockopt SOL_SOCKET SO_RCVBUF", __func__); else for (rcvbuf = MAX_RTSOCK_BUF; rcvbuf > default_rcvbuf && setsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1 && errno == ENOBUFS; rcvbuf /= 2) ; /* nothing */ kr_state.pid = getpid(); kr_state.rtseq = 1; if (fetchtable() == -1) return (-1); event_set(&kr_state.ev, kr_state.fd, EV_READ | EV_PERSIST, kr_dispatch_msg, NULL); event_add(&kr_state.ev, NULL); if ((kr_state.ioctl_fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)) == -1) { log_warn("%s: ioctl socket", __func__); return (-1); } return (0); } void kif_redistribute(const char *ifname) { struct kif_node *kif; struct kif_addr *ka; RB_FOREACH(kif, kif_tree, &kit) { if (ifname && strcmp(kif->k.ifname, ifname) != 0) continue; TAILQ_FOREACH(ka, &kif->addrs, entry) main_imsg_compose_ldpe(IMSG_NEWADDR, 0, &ka->a, sizeof(ka->a)); } } int kr_change(struct kroute *kr) { struct kroute_prefix *kp; struct kroute_priority *kprio; struct kroute_node *kn; int action = RTM_ADD; kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen); if (kp == NULL) goto miss; kprio = kroute_find_prio(kp, kr->priority); if (kprio == NULL) goto miss; kn = kroute_find_gw(kprio, &kr->nexthop); if (kn == NULL) goto miss; if (kn->r.flags & F_LDPD_INSERTED) action = RTM_CHANGE; kn->r.local_label = kr->local_label; kn->r.remote_label = kr->remote_label; kn->r.flags = kn->r.flags | F_LDPD_INSERTED; /* send update */ if (send_rtmsg(kr_state.fd, action, &kn->r, AF_MPLS) == -1) return (-1); if (ldp_addrisset(kn->r.af, &kn->r.nexthop) && kn->r.remote_label != NO_LABEL) { if (send_rtmsg(kr_state.fd, RTM_CHANGE, &kn->r, AF_INET) == -1) return (-1); } return (0); miss: log_warnx("%s: lost FEC %s/%d nexthop %s", __func__, log_addr(kr->af, &kr->prefix), kr->prefixlen, log_addr(kr->af, &kr->nexthop)); return (-1); } int kr_delete(struct kroute *kr) { struct kroute_prefix *kp; struct kroute_priority *kprio; struct kroute_node *kn; int update = 0; kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen); if (kp == NULL) return (0); kprio = kroute_find_prio(kp, kr->priority); if (kprio == NULL) return (0); kn = kroute_find_gw(kprio, &kr->nexthop); if (kn == NULL) return (0); if (!(kn->r.flags & F_LDPD_INSERTED)) return (0); if (ldp_addrisset(kn->r.af, &kn->r.nexthop) && kn->r.remote_label != NO_LABEL) update = 1; /* kill MPLS LSP */ if (send_rtmsg(kr_state.fd, RTM_DELETE, &kn->r, AF_MPLS) == -1) return (-1); kn->r.flags &= ~F_LDPD_INSERTED; kn->r.local_label = NO_LABEL; kn->r.remote_label = NO_LABEL; if (update && send_rtmsg(kr_state.fd, RTM_CHANGE, &kn->r, AF_INET) == -1) return (-1); return (0); } void kr_shutdown(void) { kr_fib_decouple(); kroute_clear(); kif_clear(); } void kr_fib_couple(void) { struct kroute_prefix *kp; struct kroute_priority *kprio; struct kroute_node *kn; struct kif_node *kif; if (kr_state.fib_sync == 1) /* already coupled */ return; kr_state.fib_sync = 1; RB_FOREACH(kp, kroute_tree, &krt) { kprio = TAILQ_FIRST(&kp->priorities); if (kprio == NULL) continue; TAILQ_FOREACH(kn, &kprio->nexthops, entry) { if (!(kn->r.flags & F_LDPD_INSERTED)) continue; send_rtmsg(kr_state.fd, RTM_ADD, &kn->r, AF_MPLS); if (ldp_addrisset(kn->r.af, &kn->r.nexthop) && kn->r.remote_label != NO_LABEL) { send_rtmsg(kr_state.fd, RTM_CHANGE, &kn->r, AF_INET); } } } RB_FOREACH(kif, kif_tree, &kit) if (kif->kpw) kmpw_install(kif->k.ifname, kif->kpw); log_info("kernel routing table coupled"); } void kr_fib_decouple(void) { struct kroute_prefix *kp; struct kroute_priority *kprio; struct kroute_node *kn; uint32_t rl; struct kif_node *kif; if (kr_state.fib_sync == 0) /* already decoupled */ return; RB_FOREACH(kp, kroute_tree, &krt) { kprio = TAILQ_FIRST(&kp->priorities); if (kprio == NULL) continue; TAILQ_FOREACH(kn, &kprio->nexthops, entry) { if (!(kn->r.flags & F_LDPD_INSERTED)) continue; send_rtmsg(kr_state.fd, RTM_DELETE, &kn->r, AF_MPLS); if (ldp_addrisset(kn->r.af, &kn->r.nexthop) && kn->r.remote_label != NO_LABEL) { rl = kn->r.remote_label; kn->r.remote_label = NO_LABEL; send_rtmsg(kr_state.fd, RTM_CHANGE, &kn->r, AF_INET); kn->r.remote_label = rl; } } } RB_FOREACH(kif, kif_tree, &kit) if (kif->kpw) kmpw_uninstall(kif->k.ifname); kr_state.fib_sync = 0; log_info("kernel routing table decoupled"); } void kr_change_egress_label(int af, int was_implicit) { struct kroute_prefix *kp; struct kroute_priority *kprio; struct kroute_node *kn; RB_FOREACH(kp, kroute_tree, &krt) { if (kp->af != af) continue; TAILQ_FOREACH(kprio, &kp->priorities, entry) { TAILQ_FOREACH(kn, &kprio->nexthops, entry) { if (kn->r.local_label > MPLS_LABEL_RESERVED_MAX) continue; if (!was_implicit) { kn->r.local_label = MPLS_LABEL_IMPLNULL; continue; } switch (kn->r.af) { case AF_INET: kn->r.local_label = MPLS_LABEL_IPV4NULL; break; case AF_INET6: kn->r.local_label = MPLS_LABEL_IPV6NULL; break; default: break; } } } } } /* ARGSUSED */ static void kr_dispatch_msg(int fd, short event, void *bula) { if (dispatch_rtmsg() == -1) event_loopexit(NULL); } void kr_show_route(struct imsg *imsg) { struct kroute_prefix *kp; struct kroute_priority *kprio; struct kroute_node *kn; int flags; struct kroute kr; switch (imsg->hdr.type) { case IMSG_CTL_KROUTE: if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(flags)) { log_warnx("%s: wrong imsg len", __func__); return; } memcpy(&flags, imsg->data, sizeof(flags)); RB_FOREACH(kp, kroute_tree, &krt) TAILQ_FOREACH(kprio, &kp->priorities, entry) TAILQ_FOREACH(kn, &kprio->nexthops, entry) { if (flags && !(kn->r.flags & flags)) continue; main_imsg_compose_ldpe(IMSG_CTL_KROUTE, imsg->hdr.pid, &kn->r, sizeof(kn->r)); } break; case IMSG_CTL_KROUTE_ADDR: if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(kr)) { log_warnx("%s: wrong imsg len", __func__); return; } memcpy(&kr, imsg->data, sizeof(kr)); kprio = kroute_match(kr.af, &kr.prefix); if (kprio == NULL) break; TAILQ_FOREACH(kn, &kprio->nexthops, entry) main_imsg_compose_ldpe(IMSG_CTL_KROUTE, imsg->hdr.pid, &kn->r, sizeof(kn->r)); break; default: log_debug("%s: error handling imsg", __func__); break; } main_imsg_compose_ldpe(IMSG_CTL_END, imsg->hdr.pid, NULL, 0); } void kr_ifinfo(char *ifname, pid_t pid) { struct kif_node *kif; RB_FOREACH(kif, kif_tree, &kit) if (ifname == NULL || !strcmp(ifname, kif->k.ifname)) { main_imsg_compose_ldpe(IMSG_CTL_IFINFO, pid, &kif->k, sizeof(kif->k)); } main_imsg_compose_ldpe(IMSG_CTL_END, pid, NULL, 0); } static void kr_redist_remove(struct kroute *kr) { /* was the route redistributed? */ if ((kr->flags & F_REDISTRIBUTED) == 0) return; /* remove redistributed flag */ kr->flags &= ~F_REDISTRIBUTED; main_imsg_compose_lde(IMSG_NETWORK_DEL, 0, kr, sizeof(*kr)); } static int kr_redist_eval(struct kroute *kr) { /* was the route redistributed? */ if (kr->flags & F_REDISTRIBUTED) goto dont_redistribute; /* Dynamic routes are not redistributable. */ if (kr->flags & F_DYNAMIC) goto dont_redistribute; /* filter-out non-redistributable addresses */ if (bad_addr(kr->af, &kr->prefix) || (kr->af == AF_INET6 && IN6_IS_SCOPE_EMBED(&kr->prefix.v6))) goto dont_redistribute; /* do not redistribute the default route */ if (kr->prefixlen == 0) goto dont_redistribute; /* * Consider networks with nexthop loopback as not redistributable * unless it is a reject or blackhole route. */ switch (kr->af) { case AF_INET: if (kr->nexthop.v4.s_addr == htonl(INADDR_LOOPBACK) && !(kr->flags & (F_BLACKHOLE|F_REJECT))) goto dont_redistribute; break; case AF_INET6: if (IN6_IS_ADDR_LOOPBACK(&kr->nexthop.v6) && !(kr->flags & (F_BLACKHOLE|F_REJECT))) goto dont_redistribute; break; default: log_debug("%s: unexpected address-family", __func__); break; } /* prefix should be redistributed */ kr->flags |= F_REDISTRIBUTED; main_imsg_compose_lde(IMSG_NETWORK_ADD, 0, kr, sizeof(*kr)); return (1); dont_redistribute: return (0); } static void kr_redistribute(struct kroute_prefix *kp) { struct kroute_priority *kprio; struct kroute_node *kn; TAILQ_FOREACH_REVERSE(kprio, &kp->priorities, plist, entry) { if (kprio == TAILQ_FIRST(&kp->priorities)) { TAILQ_FOREACH(kn, &kprio->nexthops, entry) kr_redist_eval(&kn->r); } else { TAILQ_FOREACH(kn, &kprio->nexthops, entry) kr_redist_remove(&kn->r); } } } /* rb-tree compare */ static __inline int kroute_compare(struct kroute_prefix *a, struct kroute_prefix *b) { int addrcmp; if (a->af < b->af) return (-1); if (a->af > b->af) return (1); addrcmp = ldp_addrcmp(a->af, &a->prefix, &b->prefix); if (addrcmp != 0) return (addrcmp); if (a->prefixlen < b->prefixlen) return (-1); if (a->prefixlen > b->prefixlen) return (1); return (0); } /* tree management */ static struct kroute_prefix * kroute_find_prefix(int af, union ldpd_addr *prefix, uint8_t prefixlen) { struct kroute_prefix s; s.af = af; s.prefix = *prefix; s.prefixlen = prefixlen; return (RB_FIND(kroute_tree, &krt, &s)); } static struct kroute_priority * kroute_find_prio(struct kroute_prefix *kp, uint8_t prio) { struct kroute_priority *kprio; /* RTP_ANY here picks the lowest priority node */ if (prio == RTP_ANY) return (TAILQ_FIRST(&kp->priorities)); TAILQ_FOREACH(kprio, &kp->priorities, entry) if (kprio->priority == prio) return (kprio); return (NULL); } static struct kroute_node * kroute_find_gw(struct kroute_priority *kprio, union ldpd_addr *nh) { struct kroute_node *kn; TAILQ_FOREACH(kn, &kprio->nexthops, entry) if (ldp_addrcmp(kprio->kp->af, &kn->r.nexthop, nh) == 0) return (kn); return (NULL); } static int kroute_insert(struct kroute *kr) { struct kroute_prefix *kp; struct kroute_priority *kprio, *tmp; struct kroute_node *kn; kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen); if (kp == NULL) { kp = calloc(1, sizeof((*kp))); if (kp == NULL) fatal(__func__); kp->af = kr->af; kp->prefix = kr->prefix; kp->prefixlen = kr->prefixlen; TAILQ_INIT(&kp->priorities); RB_INSERT(kroute_tree, &krt, kp); } kprio = kroute_find_prio(kp, kr->priority); if (kprio == NULL) { kprio = calloc(1, sizeof(*kprio)); if (kprio == NULL) fatal(__func__); kprio->kp = kp; kprio->priority = kr->priority; TAILQ_INIT(&kprio->nexthops); /* lower priorities first */ TAILQ_FOREACH(tmp, &kp->priorities, entry) if (tmp->priority > kprio->priority) break; if (tmp) TAILQ_INSERT_BEFORE(tmp, kprio, entry); else TAILQ_INSERT_TAIL(&kp->priorities, kprio, entry); } kn = kroute_find_gw(kprio, &kr->nexthop); if (kn == NULL) { kn = calloc(1, sizeof(*kn)); if (kn == NULL) fatal(__func__); kn->kprio = kprio; kn->r = *kr; TAILQ_INSERT_TAIL(&kprio->nexthops, kn, entry); } kr_redistribute(kp); return (0); } static int kroute_uninstall(struct kroute_node *kn) { /* kill MPLS LSP if one was installed */ if (kn->r.flags & F_LDPD_INSERTED) if (send_rtmsg(kr_state.fd, RTM_DELETE, &kn->r, AF_MPLS) == -1) return (-1); return (0); } static int kroute_remove(struct kroute *kr) { struct kroute_prefix *kp; struct kroute_priority *kprio; struct kroute_node *kn; kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen); if (kp == NULL) goto notfound; kprio = kroute_find_prio(kp, kr->priority); if (kprio == NULL) goto notfound; kn = kroute_find_gw(kprio, &kr->nexthop); if (kn == NULL) goto notfound; kr_redist_remove(&kn->r); kroute_uninstall(kn); TAILQ_REMOVE(&kprio->nexthops, kn, entry); free(kn); if (TAILQ_EMPTY(&kprio->nexthops)) { TAILQ_REMOVE(&kp->priorities, kprio, entry); free(kprio); } if (TAILQ_EMPTY(&kp->priorities)) { if (RB_REMOVE(kroute_tree, &krt, kp) == NULL) { log_warnx("%s failed for %s/%u", __func__, log_addr(kr->af, &kr->prefix), kp->prefixlen); return (-1); } free(kp); } else kr_redistribute(kp); return (0); notfound: log_warnx("%s failed to find %s/%u", __func__, log_addr(kr->af, &kr->prefix), kr->prefixlen); return (-1); } static void kroute_clear(void) { struct kroute_prefix *kp; struct kroute_priority *kprio; struct kroute_node *kn; while ((kp = RB_MIN(kroute_tree, &krt)) != NULL) { while ((kprio = TAILQ_FIRST(&kp->priorities)) != NULL) { while ((kn = TAILQ_FIRST(&kprio->nexthops)) != NULL) { kr_redist_remove(&kn->r); kroute_uninstall(kn); TAILQ_REMOVE(&kprio->nexthops, kn, entry); free(kn); } TAILQ_REMOVE(&kp->priorities, kprio, entry); free(kprio); } RB_REMOVE(kroute_tree, &krt, kp); free(kp); } } static __inline int kif_compare(struct kif_node *a, struct kif_node *b) { return (b->k.ifindex - a->k.ifindex); } /* tree management */ static struct kif_node * kif_find(unsigned short ifindex) { struct kif_node s; memset(&s, 0, sizeof(s)); s.k.ifindex = ifindex; return (RB_FIND(kif_tree, &kit, &s)); } struct kif * kif_findname(char *ifname) { struct kif_node *kif; RB_FOREACH(kif, kif_tree, &kit) if (!strcmp(ifname, kif->k.ifname)) return (&kif->k); return (NULL); } static struct kif_node * kif_insert(unsigned short ifindex) { struct kif_node *kif; if ((kif = calloc(1, sizeof(struct kif_node))) == NULL) return (NULL); kif->k.ifindex = ifindex; TAILQ_INIT(&kif->addrs); if (RB_INSERT(kif_tree, &kit, kif) != NULL) fatalx("kif_insert: RB_INSERT"); return (kif); } static int kif_remove(struct kif_node *kif) { struct kif_addr *ka; if (RB_REMOVE(kif_tree, &kit, kif) == NULL) { log_warnx("RB_REMOVE(kif_tree, &kit, kif)"); return (-1); } while ((ka = TAILQ_FIRST(&kif->addrs)) != NULL) { main_imsg_compose_ldpe(IMSG_DELADDR, 0, &ka->a, sizeof(ka->a)); TAILQ_REMOVE(&kif->addrs, ka, entry); free(ka); } free(kif); return (0); } void kif_clear(void) { struct kif_node *kif; while ((kif = RB_MIN(kif_tree, &kit)) != NULL) kif_remove(kif); } static struct kif_node * kif_update(unsigned short ifindex, int flags, struct if_data *ifd, struct sockaddr_dl *sdl, int *link_old) { struct kif_node *kif; if ((kif = kif_find(ifindex)) == NULL) { if ((kif = kif_insert(ifindex)) == NULL) return (NULL); } else *link_old = (kif->k.flags & IFF_UP) && LINK_STATE_IS_UP(kif->k.link_state); kif->k.flags = flags; kif->k.link_state = ifd->ifi_link_state; kif->k.if_type = ifd->ifi_type; kif->k.baudrate = ifd->ifi_baudrate; kif->k.mtu = ifd->ifi_mtu; if (sdl && sdl->sdl_family == AF_LINK) { if (sdl->sdl_nlen >= sizeof(kif->k.ifname)) memcpy(kif->k.ifname, sdl->sdl_data, sizeof(kif->k.ifname) - 1); else if (sdl->sdl_nlen > 0) memcpy(kif->k.ifname, sdl->sdl_data, sdl->sdl_nlen); /* string already terminated via calloc() */ } return (kif); } static struct kroute_priority * kroute_match(int af, union ldpd_addr *key) { int i, maxprefixlen; struct kroute_prefix *kp; struct kroute_priority *kprio; union ldpd_addr addr; switch (af) { case AF_INET: maxprefixlen = 32; break; case AF_INET6: maxprefixlen = 128; break; default: log_warnx("%s: unknown af", __func__); return (NULL); } for (i = maxprefixlen; i >= 0; i--) { ldp_applymask(af, &addr, key, i); kp = kroute_find_prefix(af, &addr, i); if (kp == NULL) continue; kprio = kroute_find_prio(kp, RTP_ANY); if (kprio != NULL) return (kprio); } return (NULL); } /* misc */ static uint8_t prefixlen_classful(in_addr_t ina) { /* it hurt to write this. */ if (ina >= 0xf0000000U) /* class E */ return (32); else if (ina >= 0xe0000000U) /* class D */ return (4); else if (ina >= 0xc0000000U) /* class C */ return (24); else if (ina >= 0x80000000U) /* class B */ return (16); else /* class A */ return (8); } #define ROUNDUP(a) \ (((a) & (sizeof(long) - 1)) ? (1 + ((a) | (sizeof(long) - 1))) : (a)) static void get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info) { int i; for (i = 0; i < RTAX_MAX; i++) { if (addrs & (1 << i)) { rti_info[i] = sa; sa = (struct sockaddr *)((char *)(sa) + ROUNDUP(sa->sa_len)); } else rti_info[i] = NULL; } } static void if_change(unsigned short ifindex, int flags, struct if_data *ifd, struct sockaddr_dl *sdl) { struct kif_node *kif; struct kif_addr *ka; int link_old = 0, link_new; kif = kif_update(ifindex, flags, ifd, sdl, &link_old); if (!kif) { log_warn("%s: kif_update(%u)", __func__, ifindex); return; } link_new = (kif->k.flags & IFF_UP) && LINK_STATE_IS_UP(kif->k.link_state); if (link_new == link_old) return; main_imsg_compose_ldpe(IMSG_IFSTATUS, 0, &kif->k, sizeof(struct kif)); if (link_new) { TAILQ_FOREACH(ka, &kif->addrs, entry) main_imsg_compose_ldpe(IMSG_NEWADDR, 0, &ka->a, sizeof(ka->a)); } else { TAILQ_FOREACH(ka, &kif->addrs, entry) main_imsg_compose_ldpe(IMSG_DELADDR, 0, &ka->a, sizeof(ka->a)); } } static void if_newaddr(unsigned short ifindex, struct sockaddr *ifa, struct sockaddr *mask, struct sockaddr *brd) { struct kif_node *kif; struct sockaddr_in *ifa4, *mask4, *brd4; struct sockaddr_in6 *ifa6, *mask6, *brd6; struct kif_addr *ka; if (ifa == NULL) return; if ((kif = kif_find(ifindex)) == NULL) { log_warnx("%s: corresponding if %d not found", __func__, ifindex); return; } switch (ifa->sa_family) { case AF_INET: ifa4 = (struct sockaddr_in *) ifa; mask4 = (struct sockaddr_in *) mask; brd4 = (struct sockaddr_in *) brd; /* filter out unwanted addresses */ if (bad_addr_v4(ifa4->sin_addr)) return; if ((ka = calloc(1, sizeof(struct kif_addr))) == NULL) fatal("if_newaddr"); ka->a.addr.v4 = ifa4->sin_addr; if (mask4) ka->a.prefixlen = mask2prefixlen(mask4->sin_addr.s_addr); if (brd4) ka->a.dstbrd.v4 = brd4->sin_addr; break; case AF_INET6: ifa6 = (struct sockaddr_in6 *) ifa; mask6 = (struct sockaddr_in6 *) mask; brd6 = (struct sockaddr_in6 *) brd; /* We only care about link-local and global-scope. */ if (bad_addr_v6(&ifa6->sin6_addr)) return; clearscope(&ifa6->sin6_addr); if ((ka = calloc(1, sizeof(struct kif_addr))) == NULL) fatal("if_newaddr"); ka->a.addr.v6 = ifa6->sin6_addr; if (mask6) ka->a.prefixlen = mask2prefixlen6(mask6); if (brd6) ka->a.dstbrd.v6 = brd6->sin6_addr; break; default: return; } ka->a.ifindex = ifindex; ka->a.af = ifa->sa_family; TAILQ_INSERT_TAIL(&kif->addrs, ka, entry); /* notify ldpe about new address */ main_imsg_compose_ldpe(IMSG_NEWADDR, 0, &ka->a, sizeof(ka->a)); } static void if_deladdr(unsigned short ifindex, struct sockaddr *ifa, struct sockaddr *mask, struct sockaddr *brd) { struct kif_node *kif; struct sockaddr_in *ifa4, *mask4, *brd4; struct sockaddr_in6 *ifa6, *mask6, *brd6; struct kaddr k; struct kif_addr *ka, *nka; if (ifa == NULL) return; if ((kif = kif_find(ifindex)) == NULL) { log_warnx("%s: corresponding if %d not found", __func__, ifindex); return; } memset(&k, 0, sizeof(k)); k.af = ifa->sa_family; switch (ifa->sa_family) { case AF_INET: ifa4 = (struct sockaddr_in *) ifa; mask4 = (struct sockaddr_in *) mask; brd4 = (struct sockaddr_in *) brd; /* filter out unwanted addresses */ if (bad_addr_v4(ifa4->sin_addr)) return; k.addr.v4 = ifa4->sin_addr; if (mask4) k.prefixlen = mask2prefixlen(mask4->sin_addr.s_addr); if (brd4) k.dstbrd.v4 = brd4->sin_addr; break; case AF_INET6: ifa6 = (struct sockaddr_in6 *) ifa; mask6 = (struct sockaddr_in6 *) mask; brd6 = (struct sockaddr_in6 *) brd; /* We only care about link-local and global-scope. */ if (bad_addr_v6(&ifa6->sin6_addr)) return; clearscope(&ifa6->sin6_addr); k.addr.v6 = ifa6->sin6_addr; if (mask6) k.prefixlen = mask2prefixlen6(mask6); if (brd6) k.dstbrd.v6 = brd6->sin6_addr; break; default: return; } for (ka = TAILQ_FIRST(&kif->addrs); ka != NULL; ka = nka) { nka = TAILQ_NEXT(ka, entry); if (ka->a.af != k.af || ka->a.prefixlen != k.prefixlen || ldp_addrcmp(ka->a.af, &ka->a.addr, &k.addr)) continue; /* notify ldpe about removed address */ main_imsg_compose_ldpe(IMSG_DELADDR, 0, &ka->a, sizeof(ka->a)); TAILQ_REMOVE(&kif->addrs, ka, entry); free(ka); return; } } static void if_announce(void *msg) { struct if_announcemsghdr *ifan; struct kif_node *kif; ifan = msg; switch (ifan->ifan_what) { case IFAN_ARRIVAL: kif = kif_insert(ifan->ifan_index); if (kif) strlcpy(kif->k.ifname, ifan->ifan_name, sizeof(kif->k.ifname)); break; case IFAN_DEPARTURE: kif = kif_find(ifan->ifan_index); if (kif) kif_remove(kif); break; } } /* rtsock */ static int send_rtmsg(int fd, int action, struct kroute *kr, int family) { switch (kr->af) { case AF_INET: return (send_rtmsg_v4(fd, action, kr, family)); case AF_INET6: return (send_rtmsg_v6(fd, action, kr, family)); default: fatalx("send_rtmsg: unknown af"); } } static int send_rtmsg_v4(int fd, int action, struct kroute *kr, int family) { struct iovec iov[5]; struct rt_msghdr hdr; struct sockaddr_mpls label_in, label_out; struct sockaddr_in dst, mask, nexthop; int iovcnt = 0; if (kr_state.fib_sync == 0) return (0); /* * Reserved labels (implicit and explicit NULL) should not be added * to the FIB. */ if (family == AF_MPLS && kr->local_label < MPLS_LABEL_RESERVED_MAX) return (0); /* initialize header */ memset(&hdr, 0, sizeof(hdr)); hdr.rtm_version = RTM_VERSION; hdr.rtm_type = action; hdr.rtm_flags = RTF_UP; hdr.rtm_fmask = RTF_MPLS; hdr.rtm_seq = kr_state.rtseq++; /* overflow doesn't matter */ hdr.rtm_msglen = sizeof(hdr); hdr.rtm_hdrlen = sizeof(struct rt_msghdr); hdr.rtm_priority = kr->priority; /* adjust iovec */ iov[iovcnt].iov_base = &hdr; iov[iovcnt++].iov_len = sizeof(hdr); if (family == AF_MPLS) { memset(&label_in, 0, sizeof(label_in)); label_in.smpls_len = sizeof(label_in); label_in.smpls_family = AF_MPLS; label_in.smpls_label = htonl(kr->local_label << MPLS_LABEL_OFFSET); /* adjust header */ hdr.rtm_flags |= RTF_MPLS | RTF_MPATH; hdr.rtm_addrs |= RTA_DST; hdr.rtm_msglen += sizeof(label_in); /* adjust iovec */ iov[iovcnt].iov_base = &label_in; iov[iovcnt++].iov_len = sizeof(label_in); } else { memset(&dst, 0, sizeof(dst)); dst.sin_len = sizeof(dst); dst.sin_family = AF_INET; dst.sin_addr = kr->prefix.v4; /* adjust header */ hdr.rtm_addrs |= RTA_DST; hdr.rtm_msglen += sizeof(dst); /* adjust iovec */ iov[iovcnt].iov_base = &dst; iov[iovcnt++].iov_len = sizeof(dst); } memset(&nexthop, 0, sizeof(nexthop)); nexthop.sin_len = sizeof(nexthop); nexthop.sin_family = AF_INET; nexthop.sin_addr = kr->nexthop.v4; /* adjust header */ hdr.rtm_flags |= RTF_GATEWAY; hdr.rtm_addrs |= RTA_GATEWAY; hdr.rtm_msglen += sizeof(nexthop); /* adjust iovec */ iov[iovcnt].iov_base = &nexthop; iov[iovcnt++].iov_len = sizeof(nexthop); if (family == AF_INET) { memset(&mask, 0, sizeof(mask)); mask.sin_len = sizeof(mask); mask.sin_family = AF_INET; mask.sin_addr.s_addr = prefixlen2mask(kr->prefixlen); /* adjust header */ hdr.rtm_addrs |= RTA_NETMASK; hdr.rtm_msglen += sizeof(mask); /* adjust iovec */ iov[iovcnt].iov_base = &mask; iov[iovcnt++].iov_len = sizeof(mask); } /* If action is RTM_DELETE we have to get rid of MPLS infos */ if (kr->remote_label != NO_LABEL && action != RTM_DELETE) { memset(&label_out, 0, sizeof(label_out)); label_out.smpls_len = sizeof(label_out); label_out.smpls_family = AF_MPLS; label_out.smpls_label = htonl(kr->remote_label << MPLS_LABEL_OFFSET); /* adjust header */ hdr.rtm_addrs |= RTA_SRC; hdr.rtm_flags |= RTF_MPLS; hdr.rtm_msglen += sizeof(label_out); /* adjust iovec */ iov[iovcnt].iov_base = &label_out; iov[iovcnt++].iov_len = sizeof(label_out); if (kr->remote_label == MPLS_LABEL_IMPLNULL) { if (family == AF_MPLS) hdr.rtm_mpls = MPLS_OP_POP; else return (0); } else { if (family == AF_MPLS) hdr.rtm_mpls = MPLS_OP_SWAP; else hdr.rtm_mpls = MPLS_OP_PUSH; } } retry: if (writev(fd, iov, iovcnt) == -1) { if (errno == ESRCH) { if (hdr.rtm_type == RTM_CHANGE && family == AF_MPLS) { hdr.rtm_type = RTM_ADD; goto retry; } else if (hdr.rtm_type == RTM_DELETE) { log_info("route %s/%u vanished before delete", inet_ntoa(kr->prefix.v4), kr->prefixlen); return (-1); } } log_warn("%s action %u, af %s, prefix %s/%u", __func__, hdr.rtm_type, af_name(family), inet_ntoa(kr->prefix.v4), kr->prefixlen); return (-1); } return (0); } static int send_rtmsg_v6(int fd, int action, struct kroute *kr, int family) { return (0); } static int fetchtable(void) { size_t len; int mib[7]; char *buf; int rv; mib[0] = CTL_NET; mib[1] = PF_ROUTE; mib[2] = 0; mib[3] = 0; mib[4] = NET_RT_DUMP; mib[5] = 0; mib[6] = 0; /* rtableid */ if (sysctl(mib, 7, NULL, &len, NULL, 0) == -1) { log_warn("sysctl"); return (-1); } if ((buf = malloc(len)) == NULL) { log_warn(__func__); return (-1); } if (sysctl(mib, 7, buf, &len, NULL, 0) == -1) { log_warn("sysctl"); free(buf); return (-1); } rv = rtmsg_process(buf, len); free(buf); return (rv); } static int fetchifs(void) { size_t len; int mib[6]; char *buf; int rv; mib[0] = CTL_NET; mib[1] = PF_ROUTE; mib[2] = 0; mib[3] = 0; /* wildcard */ mib[4] = NET_RT_IFLIST; mib[5] = 0; if (sysctl(mib, 6, NULL, &len, NULL, 0) == -1) { log_warn("sysctl"); return (-1); } if ((buf = malloc(len)) == NULL) { log_warn(__func__); return (-1); } if (sysctl(mib, 6, buf, &len, NULL, 0) == -1) { log_warn("sysctl"); free(buf); return (-1); } rv = rtmsg_process(buf, len); free(buf); return (rv); } static int dispatch_rtmsg(void) { char buf[RT_BUF_SIZE]; ssize_t n; if ((n = read(kr_state.fd, &buf, sizeof(buf))) == -1) { if (errno == EAGAIN || errno == EINTR) return (0); log_warn("%s: read error", __func__); return (-1); } if (n == 0) { log_warnx("routing socket closed"); return (-1); } return (rtmsg_process(buf, n)); } static int rtmsg_process(char *buf, size_t len) { struct rt_msghdr *rtm; struct if_msghdr ifm; struct ifa_msghdr *ifam; struct sockaddr *sa, *rti_info[RTAX_MAX]; size_t offset; char *next; for (offset = 0; offset < len; offset += rtm->rtm_msglen) { next = buf + offset; rtm = (struct rt_msghdr *)next; if (len < offset + sizeof(unsigned short) || len < offset + rtm->rtm_msglen) fatalx("rtmsg_process: partial rtm in buffer"); if (rtm->rtm_version != RTM_VERSION) continue; log_rtmsg(rtm->rtm_type); sa = (struct sockaddr *)(next + rtm->rtm_hdrlen); get_rtaddrs(rtm->rtm_addrs, sa, rti_info); switch (rtm->rtm_type) { case RTM_ADD: case RTM_GET: case RTM_CHANGE: case RTM_DELETE: if (rtm->rtm_errno) /* failed attempts... */ continue; if (rtm->rtm_tableid != 0) continue; if (rtm->rtm_type == RTM_GET && rtm->rtm_pid != kr_state.pid) continue; /* Skip ARP/ND cache and broadcast routes. */ if (rtm->rtm_flags & (RTF_LLINFO|RTF_BROADCAST)) continue; /* LDP should follow the IGP and ignore BGP routes */ if (rtm->rtm_priority == RTP_BGP) continue; if (rtmsg_process_route(rtm, rti_info) == -1) return (-1); } switch (rtm->rtm_type) { case RTM_IFINFO: memcpy(&ifm, next, sizeof(ifm)); if_change(ifm.ifm_index, ifm.ifm_flags, &ifm.ifm_data, (struct sockaddr_dl *)rti_info[RTAX_IFP]); break; case RTM_NEWADDR: ifam = (struct ifa_msghdr *)rtm; if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA | RTA_BRD)) == 0) break; if_newaddr(ifam->ifam_index, (struct sockaddr *)rti_info[RTAX_IFA], (struct sockaddr *)rti_info[RTAX_NETMASK], (struct sockaddr *)rti_info[RTAX_BRD]); break; case RTM_DELADDR: ifam = (struct ifa_msghdr *)rtm; if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA | RTA_BRD)) == 0) break; if_deladdr(ifam->ifam_index, (struct sockaddr *)rti_info[RTAX_IFA], (struct sockaddr *)rti_info[RTAX_NETMASK], (struct sockaddr *)rti_info[RTAX_BRD]); break; case RTM_IFANNOUNCE: if_announce(next); break; default: /* ignore for now */ break; } } return (offset); } static int rtmsg_process_route(struct rt_msghdr *rtm, struct sockaddr *rti_info[RTAX_MAX]) { struct sockaddr *sa; struct sockaddr_in *sa_in; struct sockaddr_in6 *sa_in6; struct kroute kr; struct kroute_prefix *kp; struct kroute_priority *kprio; struct kroute_node *kn; if ((sa = rti_info[RTAX_DST]) == NULL) return (-1); memset(&kr, 0, sizeof(kr)); kr.af = sa->sa_family; switch (kr.af) { case AF_INET: kr.prefix.v4 = ((struct sockaddr_in *)sa)->sin_addr; sa_in = (struct sockaddr_in *) rti_info[RTAX_NETMASK]; if (sa_in != NULL && sa_in->sin_len != 0) kr.prefixlen = mask2prefixlen(sa_in->sin_addr.s_addr); else if (rtm->rtm_flags & RTF_HOST) kr.prefixlen = 32; else if (kr.prefix.v4.s_addr == INADDR_ANY) kr.prefixlen = 0; else kr.prefixlen = prefixlen_classful(kr.prefix.v4.s_addr); break; case AF_INET6: kr.prefix.v6 = ((struct sockaddr_in6 *)sa)->sin6_addr; sa_in6 = (struct sockaddr_in6 *)rti_info[RTAX_NETMASK]; if (sa_in6 != NULL && sa_in6->sin6_len != 0) kr.prefixlen = mask2prefixlen6(sa_in6); else if (rtm->rtm_flags & RTF_HOST) kr.prefixlen = 128; else if (IN6_IS_ADDR_UNSPECIFIED(&kr.prefix.v6)) kr.prefixlen = 0; else fatalx("in6 net addr without netmask"); break; default: return (0); } kr.ifindex = rtm->rtm_index; if ((sa = rti_info[RTAX_GATEWAY]) != NULL) { switch (sa->sa_family) { case AF_INET: kr.nexthop.v4 = ((struct sockaddr_in *)sa)->sin_addr; break; case AF_INET6: sa_in6 = (struct sockaddr_in6 *)sa; recoverscope(sa_in6); kr.nexthop.v6 = sa_in6->sin6_addr; if (sa_in6->sin6_scope_id) kr.ifindex = sa_in6->sin6_scope_id; break; case AF_LINK: kr.flags |= F_CONNECTED; break; } } if (rtm->rtm_flags & RTF_STATIC) kr.flags |= F_STATIC; if (rtm->rtm_flags & RTF_BLACKHOLE) kr.flags |= F_BLACKHOLE; if (rtm->rtm_flags & RTF_REJECT) kr.flags |= F_REJECT; if (rtm->rtm_flags & RTF_DYNAMIC) kr.flags |= F_DYNAMIC; /* routes attached to connected or loopback interfaces */ if (rtm->rtm_flags & RTF_CONNECTED || ldp_addrcmp(kr.af, &kr.prefix, &kr.nexthop) == 0) kr.flags |= F_CONNECTED; kr.priority = rtm->rtm_priority; if (rtm->rtm_type == RTM_CHANGE) { /* * The kernel doesn't allow RTM_CHANGE for multipath routes. * If we got this message we know that the route has only one * nexthop and we should remove it before installing the same * route with the new nexthop. */ kp = kroute_find_prefix(kr.af, &kr.prefix, kr.prefixlen); if (kp) { kprio = kroute_find_prio(kp, kr.priority); if (kprio) { kn = TAILQ_FIRST(&kprio->nexthops); if (kn) kroute_remove(&kn->r); } } } kn = NULL; kp = kroute_find_prefix(kr.af, &kr.prefix, kr.prefixlen); if (kp) { kprio = kroute_find_prio(kp, kr.priority); if (kprio) kn = kroute_find_gw(kprio, &kr.nexthop); } if (rtm->rtm_type == RTM_DELETE) { if (kn == NULL) return (0); return (kroute_remove(&kr)); } if (!ldp_addrisset(kr.af, &kr.nexthop) && !(kr.flags & F_CONNECTED)) { log_warnx("%s: no nexthop for %s/%u", __func__, log_addr(kr.af, &kr.prefix), kr.prefixlen); return (-1); } if (kn != NULL) { /* update route */ kn->r = kr; kr_redistribute(kp); } else { kr.local_label = NO_LABEL; kr.remote_label = NO_LABEL; kroute_insert(&kr); } return (0); } int kmpw_set(struct kpw *kpw) { struct kif_node *kif; kif = kif_find(kpw->ifindex); if (kif == NULL) { log_warnx("%s: failed to find mpw by index (%u)", __func__, kpw->ifindex); return (-1); } if (kif->kpw == NULL) kif->kpw = malloc(sizeof(*kif->kpw)); *kif->kpw = *kpw; return (kmpw_install(kif->k.ifname, kpw)); } int kmpw_unset(struct kpw *kpw) { struct kif_node *kif; kif = kif_find(kpw->ifindex); if (kif == NULL) { log_warnx("%s: failed to find mpw by index (%u)", __func__, kpw->ifindex); return (-1); } if (kif->kpw == NULL) { log_warnx("%s: %s is not set", __func__, kif->k.ifname); return (-1); } free(kif->kpw); kif->kpw = NULL; return (kmpw_uninstall(kif->k.ifname)); } static int kmpw_install(const char *ifname, struct kpw *kpw) { struct ifreq ifr; struct ifmpwreq imr; memset(&imr, 0, sizeof(imr)); switch (kpw->pw_type) { case PW_TYPE_ETHERNET: imr.imr_type = IMR_TYPE_ETHERNET; break; case PW_TYPE_ETHERNET_TAGGED: imr.imr_type = IMR_TYPE_ETHERNET_TAGGED; break; default: log_warnx("%s: unhandled pseudowire type (%#X)", __func__, kpw->pw_type); return (-1); } if (kpw->flags & F_PW_CWORD) imr.imr_flags |= IMR_FLAG_CONTROLWORD; memcpy(&imr.imr_nexthop, addr2sa(kpw->af, &kpw->nexthop, 0), sizeof(imr.imr_nexthop)); imr.imr_lshim.shim_label = kpw->local_label; imr.imr_rshim.shim_label = kpw->remote_label; memset(&ifr, 0, sizeof(ifr)); strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); ifr.ifr_data = (caddr_t) &imr; if (ioctl(kr_state.ioctl_fd, SIOCSETMPWCFG, &ifr)) { log_warn("ioctl SIOCSETMPWCFG"); return (-1); } return (0); } static int kmpw_uninstall(const char *ifname) { struct ifreq ifr; struct ifmpwreq imr; memset(&ifr, 0, sizeof(ifr)); memset(&imr, 0, sizeof(imr)); strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); ifr.ifr_data = (caddr_t) &imr; if (ioctl(kr_state.ioctl_fd, SIOCSETMPWCFG, &ifr)) { log_warn("ioctl SIOCSETMPWCFG"); return (-1); } return (0); }