/* $OpenBSD: if_pflow.c,v 1.106 2023/12/16 22:16:02 mvs Exp $ */ /* * Copyright (c) 2011 Florian Obser * Copyright (c) 2011 Sebastian Benoit * Copyright (c) 2008 Henning Brauer * Copyright (c) 2008 Joerg Goltermann * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bpfilter.h" #include "pflow.h" #define PFLOW_MINMTU \ (sizeof(struct pflow_header) + sizeof(struct pflow_flow)) #ifdef PFLOWDEBUG #define DPRINTF(x) do { printf x ; } while (0) #else #define DPRINTF(x) #endif SMR_SLIST_HEAD(, pflow_softc) pflowif_list; enum pflowstat_counters { pflow_flows, pflow_packets, pflow_onomem, pflow_oerrors, pflow_ncounters, }; struct cpumem *pflow_counters; static inline void pflowstat_inc(enum pflowstat_counters c) { counters_inc(pflow_counters, c); } void pflowattach(int); int pflow_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct rtentry *rt); void pflow_output_process(void *); int pflow_clone_create(struct if_clone *, int); int pflow_clone_destroy(struct ifnet *); int pflow_set(struct pflow_softc *, struct pflowreq *); int pflow_calc_mtu(struct pflow_softc *, int, int); void pflow_setmtu(struct pflow_softc *, int); int pflowvalidsockaddr(const struct sockaddr *, int); int pflowioctl(struct ifnet *, u_long, caddr_t); struct mbuf *pflow_get_mbuf(struct pflow_softc *, u_int16_t); void pflow_flush(struct pflow_softc *); int pflow_sendout_v5(struct pflow_softc *); int pflow_sendout_ipfix(struct pflow_softc *, sa_family_t); int pflow_sendout_ipfix_tmpl(struct pflow_softc *); int pflow_sendout_mbuf(struct pflow_softc *, struct mbuf *); void pflow_timeout(void *); void pflow_timeout6(void *); void pflow_timeout_tmpl(void *); void copy_flow_data(struct pflow_flow *, struct pflow_flow *, struct pf_state *, struct pf_state_key *, int, int); void copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *, struct pflow_ipfix_flow4 *, struct pf_state *, struct pf_state_key *, struct pflow_softc *, int, int); void copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *, struct pflow_ipfix_flow6 *, struct pf_state *, struct pf_state_key *, struct pflow_softc *, int, int); int pflow_pack_flow(struct pf_state *, struct pf_state_key *, struct pflow_softc *); int pflow_pack_flow_ipfix(struct pf_state *, struct pf_state_key *, struct pflow_softc *); int export_pflow_if(struct pf_state*, struct pf_state_key *, struct pflow_softc *); int copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc); int copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow, struct pflow_softc *sc); int copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow, struct pflow_softc *sc); struct if_clone pflow_cloner = IF_CLONE_INITIALIZER("pflow", pflow_clone_create, pflow_clone_destroy); void pflowattach(int npflow) { SMR_SLIST_INIT(&pflowif_list); pflow_counters = counters_alloc(pflow_ncounters); if_clone_attach(&pflow_cloner); } int pflow_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct rtentry *rt) { m_freem(m); /* drop packet */ return (EAFNOSUPPORT); } void pflow_output_process(void *arg) { struct mbuf_list ml; struct pflow_softc *sc = arg; struct mbuf *m; mq_delist(&sc->sc_outputqueue, &ml); rw_enter_read(&sc->sc_lock); while ((m = ml_dequeue(&ml)) != NULL) { pflow_sendout_mbuf(sc, m); } rw_exit_read(&sc->sc_lock); } int pflow_clone_create(struct if_clone *ifc, int unit) { struct ifnet *ifp; struct pflow_softc *pflowif; pflowif = malloc(sizeof(*pflowif), M_DEVBUF, M_WAITOK|M_ZERO); rw_init(&pflowif->sc_lock, "pflowlk"); mtx_init(&pflowif->sc_mtx, IPL_MPFLOOR); MGET(pflowif->send_nam, M_WAIT, MT_SONAME); pflowif->sc_version = PFLOW_PROTO_DEFAULT; /* ipfix template init */ bzero(&pflowif->sc_tmpl_ipfix,sizeof(pflowif->sc_tmpl_ipfix)); pflowif->sc_tmpl_ipfix.set_header.set_id = htons(PFLOW_IPFIX_TMPL_SET_ID); pflowif->sc_tmpl_ipfix.set_header.set_length = htons(sizeof(struct pflow_ipfix_tmpl)); /* ipfix IPv4 template */ pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.tmpl_id = htons(PFLOW_IPFIX_TMPL_IPV4_ID); pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.field_count = htons(PFLOW_IPFIX_TMPL_IPV4_FIELD_COUNT); pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.field_id = htons(PFIX_IE_sourceIPv4Address); pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.len = htons(4); pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.field_id = htons(PFIX_IE_destinationIPv4Address); pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.len = htons(4); pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.field_id = htons(PFIX_IE_ingressInterface); pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.len = htons(4); pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.field_id = htons(PFIX_IE_egressInterface); pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.len = htons(4); pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.field_id = htons(PFIX_IE_packetDeltaCount); pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.len = htons(8); pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.field_id = htons(PFIX_IE_octetDeltaCount); pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.len = htons(8); pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.field_id = htons(PFIX_IE_flowStartMilliseconds); pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.len = htons(8); pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.field_id = htons(PFIX_IE_flowEndMilliseconds); pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.len = htons(8); pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.field_id = htons(PFIX_IE_sourceTransportPort); pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.len = htons(2); pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.field_id = htons(PFIX_IE_destinationTransportPort); pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.len = htons(2); pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.field_id = htons(PFIX_IE_ipClassOfService); pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.len = htons(1); pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.field_id = htons(PFIX_IE_protocolIdentifier); pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.len = htons(1); /* ipfix IPv6 template */ pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.tmpl_id = htons(PFLOW_IPFIX_TMPL_IPV6_ID); pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.field_count = htons(PFLOW_IPFIX_TMPL_IPV6_FIELD_COUNT); pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.field_id = htons(PFIX_IE_sourceIPv6Address); pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.len = htons(16); pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.field_id = htons(PFIX_IE_destinationIPv6Address); pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.len = htons(16); pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.field_id = htons(PFIX_IE_ingressInterface); pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.len = htons(4); pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.field_id = htons(PFIX_IE_egressInterface); pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.len = htons(4); pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.field_id = htons(PFIX_IE_packetDeltaCount); pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.len = htons(8); pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.field_id = htons(PFIX_IE_octetDeltaCount); pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.len = htons(8); pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.field_id = htons(PFIX_IE_flowStartMilliseconds); pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.len = htons(8); pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.field_id = htons(PFIX_IE_flowEndMilliseconds); pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.len = htons(8); pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.field_id = htons(PFIX_IE_sourceTransportPort); pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.len = htons(2); pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.field_id = htons(PFIX_IE_destinationTransportPort); pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.len = htons(2); pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.field_id = htons(PFIX_IE_ipClassOfService); pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.len = htons(1); pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.field_id = htons(PFIX_IE_protocolIdentifier); pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.len = htons(1); ifp = &pflowif->sc_if; snprintf(ifp->if_xname, sizeof ifp->if_xname, "pflow%d", unit); ifp->if_softc = pflowif; ifp->if_ioctl = pflowioctl; ifp->if_output = pflow_output; ifp->if_start = NULL; ifp->if_xflags = IFXF_CLONED; ifp->if_type = IFT_PFLOW; ifp->if_hdrlen = PFLOW_HDRLEN; ifp->if_flags = IFF_UP; ifp->if_flags &= ~IFF_RUNNING; /* not running, need receiver */ mq_init(&pflowif->sc_outputqueue, 8192, IPL_SOFTNET); pflow_setmtu(pflowif, ETHERMTU); timeout_set_proc(&pflowif->sc_tmo, pflow_timeout, pflowif); timeout_set_proc(&pflowif->sc_tmo6, pflow_timeout6, pflowif); timeout_set_proc(&pflowif->sc_tmo_tmpl, pflow_timeout_tmpl, pflowif); if_counters_alloc(ifp); if_attach(ifp); if_alloc_sadl(ifp); task_set(&pflowif->sc_outputtask, pflow_output_process, pflowif); /* Insert into list of pflows */ KERNEL_ASSERT_LOCKED(); SMR_SLIST_INSERT_HEAD_LOCKED(&pflowif_list, pflowif, sc_next); return (0); } int pflow_clone_destroy(struct ifnet *ifp) { struct pflow_softc *sc = ifp->if_softc; int error; error = 0; rw_enter_write(&sc->sc_lock); sc->sc_dying = 1; rw_exit_write(&sc->sc_lock); KERNEL_ASSERT_LOCKED(); SMR_SLIST_REMOVE_LOCKED(&pflowif_list, sc, pflow_softc, sc_next); smr_barrier(); timeout_del(&sc->sc_tmo); timeout_del(&sc->sc_tmo6); timeout_del(&sc->sc_tmo_tmpl); pflow_flush(sc); task_del(net_tq(ifp->if_index), &sc->sc_outputtask); taskq_barrier(net_tq(ifp->if_index)); mq_purge(&sc->sc_outputqueue); m_freem(sc->send_nam); if (sc->so != NULL) { error = soclose(sc->so, MSG_DONTWAIT); sc->so = NULL; } if (sc->sc_flowdst != NULL) free(sc->sc_flowdst, M_DEVBUF, sc->sc_flowdst->sa_len); if (sc->sc_flowsrc != NULL) free(sc->sc_flowsrc, M_DEVBUF, sc->sc_flowsrc->sa_len); if_detach(ifp); free(sc, M_DEVBUF, sizeof(*sc)); return (error); } int pflowvalidsockaddr(const struct sockaddr *sa, int ignore_port) { struct sockaddr_in6 *sin6; struct sockaddr_in *sin; if (sa == NULL) return (0); switch(sa->sa_family) { case AF_INET: sin = (struct sockaddr_in*) sa; return (sin->sin_addr.s_addr != INADDR_ANY && (ignore_port || sin->sin_port != 0)); case AF_INET6: sin6 = (struct sockaddr_in6*) sa; return (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) && (ignore_port || sin6->sin6_port != 0)); default: return (0); } } int pflow_set(struct pflow_softc *sc, struct pflowreq *pflowr) { struct proc *p = curproc; struct socket *so; struct sockaddr *sa; int error = 0; if (pflowr->addrmask & PFLOW_MASK_VERSION) { switch(pflowr->version) { case PFLOW_PROTO_5: case PFLOW_PROTO_10: break; default: return(EINVAL); } } rw_assert_wrlock(&sc->sc_lock); pflow_flush(sc); if (pflowr->addrmask & PFLOW_MASK_DSTIP) { if (sc->sc_flowdst != NULL && sc->sc_flowdst->sa_family != pflowr->flowdst.ss_family) { free(sc->sc_flowdst, M_DEVBUF, sc->sc_flowdst->sa_len); sc->sc_flowdst = NULL; if (sc->so != NULL) { soclose(sc->so, MSG_DONTWAIT); sc->so = NULL; } } switch (pflowr->flowdst.ss_family) { case AF_INET: if (sc->sc_flowdst == NULL) { if ((sc->sc_flowdst = malloc( sizeof(struct sockaddr_in), M_DEVBUF, M_NOWAIT)) == NULL) return (ENOMEM); } memcpy(sc->sc_flowdst, &pflowr->flowdst, sizeof(struct sockaddr_in)); sc->sc_flowdst->sa_len = sizeof(struct sockaddr_in); break; case AF_INET6: if (sc->sc_flowdst == NULL) { if ((sc->sc_flowdst = malloc( sizeof(struct sockaddr_in6), M_DEVBUF, M_NOWAIT)) == NULL) return (ENOMEM); } memcpy(sc->sc_flowdst, &pflowr->flowdst, sizeof(struct sockaddr_in6)); sc->sc_flowdst->sa_len = sizeof(struct sockaddr_in6); break; default: break; } if (sc->sc_flowdst != NULL) { sc->send_nam->m_len = sc->sc_flowdst->sa_len; sa = mtod(sc->send_nam, struct sockaddr *); memcpy(sa, sc->sc_flowdst, sc->sc_flowdst->sa_len); } } if (pflowr->addrmask & PFLOW_MASK_SRCIP) { if (sc->sc_flowsrc != NULL) free(sc->sc_flowsrc, M_DEVBUF, sc->sc_flowsrc->sa_len); sc->sc_flowsrc = NULL; if (sc->so != NULL) { soclose(sc->so, MSG_DONTWAIT); sc->so = NULL; } switch(pflowr->flowsrc.ss_family) { case AF_INET: if ((sc->sc_flowsrc = malloc( sizeof(struct sockaddr_in), M_DEVBUF, M_NOWAIT)) == NULL) return (ENOMEM); memcpy(sc->sc_flowsrc, &pflowr->flowsrc, sizeof(struct sockaddr_in)); sc->sc_flowsrc->sa_len = sizeof(struct sockaddr_in); break; case AF_INET6: if ((sc->sc_flowsrc = malloc( sizeof(struct sockaddr_in6), M_DEVBUF, M_NOWAIT)) == NULL) return (ENOMEM); memcpy(sc->sc_flowsrc, &pflowr->flowsrc, sizeof(struct sockaddr_in6)); sc->sc_flowsrc->sa_len = sizeof(struct sockaddr_in6); break; default: break; } } if (sc->so == NULL) { if (pflowvalidsockaddr(sc->sc_flowdst, 0)) { error = socreate(sc->sc_flowdst->sa_family, &so, SOCK_DGRAM, 0); if (error) return (error); if (pflowvalidsockaddr(sc->sc_flowsrc, 1)) { struct mbuf *m; MGET(m, M_WAIT, MT_SONAME); m->m_len = sc->sc_flowsrc->sa_len; sa = mtod(m, struct sockaddr *); memcpy(sa, sc->sc_flowsrc, sc->sc_flowsrc->sa_len); solock(so); error = sobind(so, m, p); sounlock(so); m_freem(m); if (error) { soclose(so, MSG_DONTWAIT); return (error); } } sc->so = so; } } else if (!pflowvalidsockaddr(sc->sc_flowdst, 0)) { soclose(sc->so, MSG_DONTWAIT); sc->so = NULL; } NET_LOCK(); mtx_enter(&sc->sc_mtx); /* error check is above */ if (pflowr->addrmask & PFLOW_MASK_VERSION) sc->sc_version = pflowr->version; pflow_setmtu(sc, ETHERMTU); switch (sc->sc_version) { case PFLOW_PROTO_5: timeout_del(&sc->sc_tmo6); timeout_del(&sc->sc_tmo_tmpl); break; case PFLOW_PROTO_10: timeout_add_sec(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT); break; default: /* NOTREACHED */ break; } mtx_leave(&sc->sc_mtx); NET_UNLOCK(); return (0); } int pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct proc *p = curproc; struct pflow_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct pflowreq pflowr; int error = 0; switch (cmd) { case SIOCSIFADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: case SIOCSIFMTU: case SIOCGETPFLOW: case SIOCSETPFLOW: break; default: return (ENOTTY); } /* XXXSMP: enforce lock order */ NET_UNLOCK(); rw_enter_write(&sc->sc_lock); if (sc->sc_dying) { error = ENXIO; goto out; } switch (cmd) { case SIOCSIFADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: NET_LOCK(); if ((ifp->if_flags & IFF_UP) && sc->so != NULL) { ifp->if_flags |= IFF_RUNNING; mtx_enter(&sc->sc_mtx); /* send templates on startup */ if (sc->sc_version == PFLOW_PROTO_10) pflow_sendout_ipfix_tmpl(sc); mtx_leave(&sc->sc_mtx); } else ifp->if_flags &= ~IFF_RUNNING; NET_UNLOCK(); break; case SIOCSIFMTU: if (ifr->ifr_mtu < PFLOW_MINMTU) { error = EINVAL; goto out; } if (ifr->ifr_mtu > MCLBYTES) ifr->ifr_mtu = MCLBYTES; NET_LOCK(); if (ifr->ifr_mtu < ifp->if_mtu) pflow_flush(sc); mtx_enter(&sc->sc_mtx); pflow_setmtu(sc, ifr->ifr_mtu); mtx_leave(&sc->sc_mtx); NET_UNLOCK(); break; case SIOCGETPFLOW: bzero(&pflowr, sizeof(pflowr)); if (sc->sc_flowsrc != NULL) memcpy(&pflowr.flowsrc, sc->sc_flowsrc, sc->sc_flowsrc->sa_len); if (sc->sc_flowdst != NULL) memcpy(&pflowr.flowdst, sc->sc_flowdst, sc->sc_flowdst->sa_len); mtx_enter(&sc->sc_mtx); pflowr.version = sc->sc_version; mtx_leave(&sc->sc_mtx); if ((error = copyout(&pflowr, ifr->ifr_data, sizeof(pflowr)))) goto out; break; case SIOCSETPFLOW: if ((error = suser(p)) != 0) goto out; if ((error = copyin(ifr->ifr_data, &pflowr, sizeof(pflowr)))) goto out; error = pflow_set(sc, &pflowr); if (error != 0) goto out; NET_LOCK(); if ((ifp->if_flags & IFF_UP) && sc->so != NULL) { ifp->if_flags |= IFF_RUNNING; mtx_enter(&sc->sc_mtx); if (sc->sc_version == PFLOW_PROTO_10) pflow_sendout_ipfix_tmpl(sc); mtx_leave(&sc->sc_mtx); } else ifp->if_flags &= ~IFF_RUNNING; NET_UNLOCK(); break; } out: rw_exit_write(&sc->sc_lock); NET_LOCK(); return (error); } int pflow_calc_mtu(struct pflow_softc *sc, int mtu, int hdrsz) { sc->sc_maxcount4 = (mtu - hdrsz - sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow4); sc->sc_maxcount6 = (mtu - hdrsz - sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow6); if (sc->sc_maxcount4 > PFLOW_MAXFLOWS) sc->sc_maxcount4 = PFLOW_MAXFLOWS; if (sc->sc_maxcount6 > PFLOW_MAXFLOWS) sc->sc_maxcount6 = PFLOW_MAXFLOWS; return (hdrsz + sizeof(struct udpiphdr) + MIN(sc->sc_maxcount4 * sizeof(struct pflow_ipfix_flow4), sc->sc_maxcount6 * sizeof(struct pflow_ipfix_flow6))); } void pflow_setmtu(struct pflow_softc *sc, int mtu_req) { int mtu; mtu = mtu_req; switch (sc->sc_version) { case PFLOW_PROTO_5: sc->sc_maxcount = (mtu - sizeof(struct pflow_header) - sizeof(struct udpiphdr)) / sizeof(struct pflow_flow); if (sc->sc_maxcount > PFLOW_MAXFLOWS) sc->sc_maxcount = PFLOW_MAXFLOWS; sc->sc_if.if_mtu = sizeof(struct pflow_header) + sizeof(struct udpiphdr) + sc->sc_maxcount * sizeof(struct pflow_flow); break; case PFLOW_PROTO_10: sc->sc_if.if_mtu = pflow_calc_mtu(sc, mtu, sizeof(struct pflow_v10_header)); break; default: /* NOTREACHED */ break; } } struct mbuf * pflow_get_mbuf(struct pflow_softc *sc, u_int16_t set_id) { struct pflow_set_header set_hdr; struct pflow_header h; struct mbuf *m; MUTEX_ASSERT_LOCKED(&sc->sc_mtx); MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) { pflowstat_inc(pflow_onomem); return (NULL); } MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); pflowstat_inc(pflow_onomem); return (NULL); } m->m_len = m->m_pkthdr.len = 0; m->m_pkthdr.ph_ifidx = 0; if (sc == NULL) /* get only a new empty mbuf */ return (m); switch (sc->sc_version) { case PFLOW_PROTO_5: /* populate pflow_header */ h.reserved1 = 0; h.reserved2 = 0; h.count = 0; h.version = htons(PFLOW_PROTO_5); h.flow_sequence = htonl(sc->sc_gcounter); h.engine_type = PFLOW_ENGINE_TYPE; h.engine_id = PFLOW_ENGINE_ID; m_copyback(m, 0, PFLOW_HDRLEN, &h, M_NOWAIT); sc->sc_count = 0; timeout_add_sec(&sc->sc_tmo, PFLOW_TIMEOUT); break; case PFLOW_PROTO_10: /* populate pflow_set_header */ set_hdr.set_length = 0; set_hdr.set_id = htons(set_id); m_copyback(m, 0, PFLOW_SET_HDRLEN, &set_hdr, M_NOWAIT); break; default: /* NOTREACHED */ break; } return (m); } void copy_flow_data(struct pflow_flow *flow1, struct pflow_flow *flow2, struct pf_state *st, struct pf_state_key *sk, int src, int dst) { flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr; flow1->src_port = flow2->dest_port = sk->port[src]; flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr; flow1->dest_port = flow2->src_port = sk->port[dst]; flow1->dest_as = flow2->src_as = flow1->src_as = flow2->dest_as = 0; flow1->if_index_in = htons(st->if_index_in); flow1->if_index_out = htons(st->if_index_out); flow2->if_index_in = htons(st->if_index_out); flow2->if_index_out = htons(st->if_index_in); flow1->dest_mask = flow2->src_mask = flow1->src_mask = flow2->dest_mask = 0; flow1->flow_packets = htonl(st->packets[0]); flow2->flow_packets = htonl(st->packets[1]); flow1->flow_octets = htonl(st->bytes[0]); flow2->flow_octets = htonl(st->bytes[1]); /* * Pretend the flow was created or expired when the machine came up * when creation is in the future of the last time a package was seen * or was created / expired before this machine came up due to pfsync. */ flow1->flow_start = flow2->flow_start = st->creation < 0 || st->creation > st->expire ? htonl(0) : htonl(st->creation * 1000); flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? htonl(0) : htonl(st->expire * 1000); flow1->tcp_flags = flow2->tcp_flags = 0; flow1->protocol = flow2->protocol = sk->proto; flow1->tos = flow2->tos = st->rule.ptr->tos; } void copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *flow1, struct pflow_ipfix_flow4 *flow2, struct pf_state *st, struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst) { flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr; flow1->src_port = flow2->dest_port = sk->port[src]; flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr; flow1->dest_port = flow2->src_port = sk->port[dst]; flow1->if_index_in = htonl(st->if_index_in); flow1->if_index_out = htonl(st->if_index_out); flow2->if_index_in = htonl(st->if_index_out); flow2->if_index_out = htonl(st->if_index_in); flow1->flow_packets = htobe64(st->packets[0]); flow2->flow_packets = htobe64(st->packets[1]); flow1->flow_octets = htobe64(st->bytes[0]); flow2->flow_octets = htobe64(st->bytes[1]); /* * Pretend the flow was created when the machine came up when creation * is in the future of the last time a package was seen due to pfsync. */ if (st->creation > st->expire) flow1->flow_start = flow2->flow_start = htobe64((gettime() - getuptime())*1000); else flow1->flow_start = flow2->flow_start = htobe64((gettime() - (getuptime() - st->creation))*1000); flow1->flow_finish = flow2->flow_finish = htobe64((gettime() - (getuptime() - st->expire))*1000); flow1->protocol = flow2->protocol = sk->proto; flow1->tos = flow2->tos = st->rule.ptr->tos; } void copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *flow1, struct pflow_ipfix_flow6 *flow2, struct pf_state *st, struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst) { bcopy(&sk->addr[src].v6, &flow1->src_ip, sizeof(flow1->src_ip)); bcopy(&sk->addr[src].v6, &flow2->dest_ip, sizeof(flow2->dest_ip)); flow1->src_port = flow2->dest_port = sk->port[src]; bcopy(&sk->addr[dst].v6, &flow1->dest_ip, sizeof(flow1->dest_ip)); bcopy(&sk->addr[dst].v6, &flow2->src_ip, sizeof(flow2->src_ip)); flow1->dest_port = flow2->src_port = sk->port[dst]; flow1->if_index_in = htonl(st->if_index_in); flow1->if_index_out = htonl(st->if_index_out); flow2->if_index_in = htonl(st->if_index_out); flow2->if_index_out = htonl(st->if_index_in); flow1->flow_packets = htobe64(st->packets[0]); flow2->flow_packets = htobe64(st->packets[1]); flow1->flow_octets = htobe64(st->bytes[0]); flow2->flow_octets = htobe64(st->bytes[1]); /* * Pretend the flow was created when the machine came up when creation * is in the future of the last time a package was seen due to pfsync. */ if (st->creation > st->expire) flow1->flow_start = flow2->flow_start = htobe64((gettime() - getuptime())*1000); else flow1->flow_start = flow2->flow_start = htobe64((gettime() - (getuptime() - st->creation))*1000); flow1->flow_finish = flow2->flow_finish = htobe64((gettime() - (getuptime() - st->expire))*1000); flow1->protocol = flow2->protocol = sk->proto; flow1->tos = flow2->tos = st->rule.ptr->tos; } int export_pflow(struct pf_state *st) { struct pflow_softc *sc = NULL; struct pf_state_key *sk; sk = st->key[st->direction == PF_IN ? PF_SK_WIRE : PF_SK_STACK]; SMR_SLIST_FOREACH(sc, &pflowif_list, sc_next) { mtx_enter(&sc->sc_mtx); switch (sc->sc_version) { case PFLOW_PROTO_5: if (sk->af == AF_INET) export_pflow_if(st, sk, sc); break; case PFLOW_PROTO_10: if (sk->af == AF_INET || sk->af == AF_INET6) export_pflow_if(st, sk, sc); break; default: /* NOTREACHED */ break; } mtx_leave(&sc->sc_mtx); } return (0); } int export_pflow_if(struct pf_state *st, struct pf_state_key *sk, struct pflow_softc *sc) { struct pf_state pfs_copy; struct ifnet *ifp = &sc->sc_if; u_int64_t bytes[2]; int ret = 0; if (!(ifp->if_flags & IFF_RUNNING)) return (0); if (sc->sc_version == PFLOW_PROTO_10) return (pflow_pack_flow_ipfix(st, sk, sc)); /* PFLOW_PROTO_5 */ if ((st->bytes[0] < (u_int64_t)PFLOW_MAXBYTES) && (st->bytes[1] < (u_int64_t)PFLOW_MAXBYTES)) return (pflow_pack_flow(st, sk, sc)); /* flow > PFLOW_MAXBYTES need special handling */ bcopy(st, &pfs_copy, sizeof(pfs_copy)); bytes[0] = pfs_copy.bytes[0]; bytes[1] = pfs_copy.bytes[1]; while (bytes[0] > PFLOW_MAXBYTES) { pfs_copy.bytes[0] = PFLOW_MAXBYTES; pfs_copy.bytes[1] = 0; if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0) return (ret); if ((bytes[0] - PFLOW_MAXBYTES) > 0) bytes[0] -= PFLOW_MAXBYTES; } while (bytes[1] > (u_int64_t)PFLOW_MAXBYTES) { pfs_copy.bytes[1] = PFLOW_MAXBYTES; pfs_copy.bytes[0] = 0; if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0) return (ret); if ((bytes[1] - PFLOW_MAXBYTES) > 0) bytes[1] -= PFLOW_MAXBYTES; } pfs_copy.bytes[0] = bytes[0]; pfs_copy.bytes[1] = bytes[1]; return (pflow_pack_flow(&pfs_copy, sk, sc)); } int copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc) { int ret = 0; MUTEX_ASSERT_LOCKED(&sc->sc_mtx); if (sc->sc_mbuf == NULL) { if ((sc->sc_mbuf = pflow_get_mbuf(sc, 0)) == NULL) return (ENOBUFS); } m_copyback(sc->sc_mbuf, PFLOW_HDRLEN + (sc->sc_count * sizeof(struct pflow_flow)), sizeof(struct pflow_flow), flow, M_NOWAIT); pflowstat_inc(pflow_flows); sc->sc_gcounter++; sc->sc_count++; if (sc->sc_count >= sc->sc_maxcount) ret = pflow_sendout_v5(sc); return(ret); } int copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow, struct pflow_softc *sc) { int ret = 0; MUTEX_ASSERT_LOCKED(&sc->sc_mtx); if (sc->sc_mbuf == NULL) { if ((sc->sc_mbuf = pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV4_ID)) == NULL) { return (ENOBUFS); } sc->sc_count4 = 0; timeout_add_sec(&sc->sc_tmo, PFLOW_TIMEOUT); } m_copyback(sc->sc_mbuf, PFLOW_SET_HDRLEN + (sc->sc_count4 * sizeof(struct pflow_ipfix_flow4)), sizeof(struct pflow_ipfix_flow4), flow, M_NOWAIT); pflowstat_inc(pflow_flows); sc->sc_gcounter++; sc->sc_count4++; if (sc->sc_count4 >= sc->sc_maxcount4) ret = pflow_sendout_ipfix(sc, AF_INET); return(ret); } int copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow, struct pflow_softc *sc) { int ret = 0; MUTEX_ASSERT_LOCKED(&sc->sc_mtx); if (sc->sc_mbuf6 == NULL) { if ((sc->sc_mbuf6 = pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV6_ID)) == NULL) { return (ENOBUFS); } sc->sc_count6 = 0; timeout_add_sec(&sc->sc_tmo6, PFLOW_TIMEOUT); } m_copyback(sc->sc_mbuf6, PFLOW_SET_HDRLEN + (sc->sc_count6 * sizeof(struct pflow_ipfix_flow6)), sizeof(struct pflow_ipfix_flow6), flow, M_NOWAIT); pflowstat_inc(pflow_flows); sc->sc_gcounter++; sc->sc_count6++; if (sc->sc_count6 >= sc->sc_maxcount6) ret = pflow_sendout_ipfix(sc, AF_INET6); return(ret); } int pflow_pack_flow(struct pf_state *st, struct pf_state_key *sk, struct pflow_softc *sc) { struct pflow_flow flow1; struct pflow_flow flow2; int ret = 0; bzero(&flow1, sizeof(flow1)); bzero(&flow2, sizeof(flow2)); if (st->direction == PF_OUT) copy_flow_data(&flow1, &flow2, st, sk, 1, 0); else copy_flow_data(&flow1, &flow2, st, sk, 0, 1); if (st->bytes[0] != 0) /* first flow from state */ ret = copy_flow_to_m(&flow1, sc); if (st->bytes[1] != 0) /* second flow from state */ ret = copy_flow_to_m(&flow2, sc); return (ret); } int pflow_pack_flow_ipfix(struct pf_state *st, struct pf_state_key *sk, struct pflow_softc *sc) { struct pflow_ipfix_flow4 flow4_1, flow4_2; struct pflow_ipfix_flow6 flow6_1, flow6_2; int ret = 0; if (sk->af == AF_INET) { bzero(&flow4_1, sizeof(flow4_1)); bzero(&flow4_2, sizeof(flow4_2)); if (st->direction == PF_OUT) copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc, 1, 0); else copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc, 0, 1); if (st->bytes[0] != 0) /* first flow from state */ ret = copy_flow_ipfix_4_to_m(&flow4_1, sc); if (st->bytes[1] != 0) /* second flow from state */ ret = copy_flow_ipfix_4_to_m(&flow4_2, sc); } else if (sk->af == AF_INET6) { bzero(&flow6_1, sizeof(flow6_1)); bzero(&flow6_2, sizeof(flow6_2)); if (st->direction == PF_OUT) copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc, 1, 0); else copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc, 0, 1); if (st->bytes[0] != 0) /* first flow from state */ ret = copy_flow_ipfix_6_to_m(&flow6_1, sc); if (st->bytes[1] != 0) /* second flow from state */ ret = copy_flow_ipfix_6_to_m(&flow6_2, sc); } return (ret); } void pflow_timeout(void *v) { struct pflow_softc *sc = v; mtx_enter(&sc->sc_mtx); switch (sc->sc_version) { case PFLOW_PROTO_5: pflow_sendout_v5(sc); break; case PFLOW_PROTO_10: pflow_sendout_ipfix(sc, AF_INET); break; default: /* NOTREACHED */ break; } mtx_leave(&sc->sc_mtx); } void pflow_timeout6(void *v) { struct pflow_softc *sc = v; mtx_enter(&sc->sc_mtx); pflow_sendout_ipfix(sc, AF_INET6); mtx_leave(&sc->sc_mtx); } void pflow_timeout_tmpl(void *v) { struct pflow_softc *sc = v; mtx_enter(&sc->sc_mtx); pflow_sendout_ipfix_tmpl(sc); mtx_leave(&sc->sc_mtx); } void pflow_flush(struct pflow_softc *sc) { mtx_enter(&sc->sc_mtx); switch (sc->sc_version) { case PFLOW_PROTO_5: pflow_sendout_v5(sc); break; case PFLOW_PROTO_10: pflow_sendout_ipfix(sc, AF_INET); pflow_sendout_ipfix(sc, AF_INET6); break; default: /* NOTREACHED */ break; } mtx_leave(&sc->sc_mtx); } int pflow_sendout_v5(struct pflow_softc *sc) { struct mbuf *m = sc->sc_mbuf; struct pflow_header *h; struct ifnet *ifp = &sc->sc_if; struct timespec tv; MUTEX_ASSERT_LOCKED(&sc->sc_mtx); timeout_del(&sc->sc_tmo); if (m == NULL) return (0); sc->sc_mbuf = NULL; if (!(ifp->if_flags & IFF_RUNNING)) { m_freem(m); return (0); } pflowstat_inc(pflow_packets); h = mtod(m, struct pflow_header *); h->count = htons(sc->sc_count); /* populate pflow_header */ h->uptime_ms = htonl(getuptime() * 1000); getnanotime(&tv); h->time_sec = htonl(tv.tv_sec); /* XXX 2038 */ h->time_nanosec = htonl(tv.tv_nsec); if (mq_enqueue(&sc->sc_outputqueue, m) == 0) task_add(net_tq(ifp->if_index), &sc->sc_outputtask); return (0); } int pflow_sendout_ipfix(struct pflow_softc *sc, sa_family_t af) { struct mbuf *m; struct pflow_v10_header *h10; struct pflow_set_header *set_hdr; struct ifnet *ifp = &sc->sc_if; u_int32_t count; int set_length; MUTEX_ASSERT_LOCKED(&sc->sc_mtx); switch (af) { case AF_INET: m = sc->sc_mbuf; timeout_del(&sc->sc_tmo); if (m == NULL) return (0); sc->sc_mbuf = NULL; count = sc->sc_count4; set_length = sizeof(struct pflow_set_header) + sc->sc_count4 * sizeof(struct pflow_ipfix_flow4); break; case AF_INET6: m = sc->sc_mbuf6; timeout_del(&sc->sc_tmo6); if (m == NULL) return (0); sc->sc_mbuf6 = NULL; count = sc->sc_count6; set_length = sizeof(struct pflow_set_header) + sc->sc_count6 * sizeof(struct pflow_ipfix_flow6); break; default: unhandled_af(af); } if (!(ifp->if_flags & IFF_RUNNING)) { m_freem(m); return (0); } pflowstat_inc(pflow_packets); set_hdr = mtod(m, struct pflow_set_header *); set_hdr->set_length = htons(set_length); /* populate pflow_header */ M_PREPEND(m, sizeof(struct pflow_v10_header), M_DONTWAIT); if (m == NULL) { pflowstat_inc(pflow_onomem); return (ENOBUFS); } h10 = mtod(m, struct pflow_v10_header *); h10->version = htons(PFLOW_PROTO_10); h10->length = htons(PFLOW_IPFIX_HDRLEN + set_length); h10->time_sec = htonl(gettime()); /* XXX 2038 */ h10->flow_sequence = htonl(sc->sc_sequence); sc->sc_sequence += count; h10->observation_dom = htonl(PFLOW_ENGINE_TYPE); if (mq_enqueue(&sc->sc_outputqueue, m) == 0) task_add(net_tq(ifp->if_index), &sc->sc_outputtask); return (0); } int pflow_sendout_ipfix_tmpl(struct pflow_softc *sc) { struct mbuf *m; struct pflow_v10_header *h10; struct ifnet *ifp = &sc->sc_if; MUTEX_ASSERT_LOCKED(&sc->sc_mtx); timeout_del(&sc->sc_tmo_tmpl); if (!(ifp->if_flags & IFF_RUNNING)) { return (0); } m = pflow_get_mbuf(sc, 0); if (m == NULL) return (0); if (m_copyback(m, 0, sizeof(struct pflow_ipfix_tmpl), &sc->sc_tmpl_ipfix, M_NOWAIT)) { m_freem(m); return (0); } pflowstat_inc(pflow_packets); /* populate pflow_header */ M_PREPEND(m, sizeof(struct pflow_v10_header), M_DONTWAIT); if (m == NULL) { pflowstat_inc(pflow_onomem); return (ENOBUFS); } h10 = mtod(m, struct pflow_v10_header *); h10->version = htons(PFLOW_PROTO_10); h10->length = htons(PFLOW_IPFIX_HDRLEN + sizeof(struct pflow_ipfix_tmpl)); h10->time_sec = htonl(gettime()); /* XXX 2038 */ h10->flow_sequence = htonl(sc->sc_sequence); h10->observation_dom = htonl(PFLOW_ENGINE_TYPE); timeout_add_sec(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT); if (mq_enqueue(&sc->sc_outputqueue, m) == 0) task_add(net_tq(ifp->if_index), &sc->sc_outputtask); return (0); } int pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m) { rw_assert_anylock(&sc->sc_lock); counters_pkt(sc->sc_if.if_counters, ifc_opackets, ifc_obytes, m->m_pkthdr.len); if (sc->so == NULL) { m_freem(m); return (EINVAL); } return (sosend(sc->so, sc->send_nam, NULL, m, NULL, 0)); } int pflow_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { if (namelen != 1) return (ENOTDIR); switch (name[0]) { case NET_PFLOW_STATS: { uint64_t counters[pflow_ncounters]; struct pflowstats pflowstats; if (newp != NULL) return (EPERM); counters_read(pflow_counters, counters, pflow_ncounters, NULL); pflowstats.pflow_flows = counters[pflow_flows]; pflowstats.pflow_packets = counters[pflow_packets]; pflowstats.pflow_onomem = counters[pflow_onomem]; pflowstats.pflow_oerrors = counters[pflow_oerrors]; return (sysctl_struct(oldp, oldlenp, newp, newlen, &pflowstats, sizeof(pflowstats))); } default: return (EOPNOTSUPP); } return (0); }