summaryrefslogtreecommitdiff
path: root/sys/net/if_pflow.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/net/if_pflow.c')
-rw-r--r--sys/net/if_pflow.c622
1 files changed, 622 insertions, 0 deletions
diff --git a/sys/net/if_pflow.c b/sys/net/if_pflow.c
new file mode 100644
index 00000000000..87e0a094ab0
--- /dev/null
+++ b/sys/net/if_pflow.c
@@ -0,0 +1,622 @@
+/* $OpenBSD: if_pflow.c,v 1.1 2008/09/09 13:56:39 henning Exp $ */
+
+/*
+ * Copyright (c) 2008 Henning Brauer <henning@openbsd.org>
+ * Copyright (c) 2008 Joerg Goltermann <jg@osn.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <dev/rndvar.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/bpf.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/tcp.h>
+
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <netinet/in_pcb.h>
+#endif /* INET */
+
+#include <net/pfvar.h>
+#include <net/if_pflow.h>
+
+#include "bpfilter.h"
+#include "pflow.h"
+
+#define PFLOW_MINMTU \
+ (sizeof(struct pflow_header) + sizeof(struct pflow_flow))
+
+#ifdef PFLOWDEBUG
+#define DPRINTF(x) do { printf x ; } while (0)
+#else
+#define DPRINTF(x)
+#endif
+
+struct pflow_softc *pflowif = NULL;
+struct pflowstats pflowstats;
+
+void pflowattach(int);
+int pflow_clone_create(struct if_clone *, int);
+int pflow_clone_destroy(struct ifnet *);
+void pflow_setmtu(struct pflow_softc *, int);
+int pflowoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct rtentry *);
+int pflowioctl(struct ifnet *, u_long, caddr_t);
+void pflowstart(struct ifnet *);
+
+struct mbuf *pflow_get_mbuf(struct pflow_softc *, void **);
+int pflow_sendout(struct pflow_softc *);
+int pflow_sendout_mbuf(struct pflow_softc *, struct mbuf *);
+void pflow_timeout(void *);
+void copy_flow_data(struct pflow_flow *, struct pflow_flow *,
+ struct pf_state *, int, int);
+int pflow_pack_flow(struct pf_state *);
+int pflow_get_dynport(void);
+
+struct if_clone pflow_cloner =
+ IF_CLONE_INITIALIZER("pflow", pflow_clone_create,
+ pflow_clone_destroy);
+
+/* from in_pcb.c */
+extern int ipport_hifirstauto;
+extern int ipport_hilastauto;
+
+void
+pflowattach(int npflow)
+{
+ if_clone_attach(&pflow_cloner);
+}
+
+int
+pflow_clone_create(struct if_clone *ifc, int unit)
+{
+ struct ifnet *ifp;
+
+ if (unit != 0)
+ return (EINVAL);
+
+ if ((pflowif = malloc(sizeof(*pflowif),
+ M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
+ return (ENOMEM);
+
+ pflowif->sc_sender_ip.s_addr = INADDR_ANY;
+ pflowif->sc_sender_port = pflow_get_dynport();
+
+ pflowif->sc_imo.imo_membership = malloc(
+ (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
+ M_WAITOK|M_ZERO);
+ pflowif->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
+ pflowif->sc_receiver_ip.s_addr = 0;
+ pflowif->sc_receiver_port = 0;
+ pflowif->sc_sender_ip.s_addr = INADDR_ANY;
+ pflowif->sc_sender_port = pflow_get_dynport();
+ ifp = &pflowif->sc_if;
+ snprintf(ifp->if_xname, sizeof ifp->if_xname, "pflow%d", unit);
+ ifp->if_softc = pflowif;
+ ifp->if_ioctl = pflowioctl;
+ ifp->if_output = pflowoutput;
+ ifp->if_start = pflowstart;
+ ifp->if_type = IFT_PFLOW;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ ifp->if_hdrlen = PFLOW_HDRLEN;
+ ifp->if_flags = IFF_UP;
+ ifp->if_flags &= ~IFF_RUNNING; /* not running, need receiver */
+ pflow_setmtu(pflowif, ETHERMTU);
+ timeout_set(&pflowif->sc_tmo, pflow_timeout, pflowif);
+ if_attach(ifp);
+ if_alloc_sadl(ifp);
+
+ return (0);
+}
+
+int
+pflow_clone_destroy(struct ifnet *ifp)
+{
+ struct pflow_softc *sc = ifp->if_softc;
+
+ timeout_del(&sc->sc_tmo);
+
+#if NBPFILTER > 0
+ bpfdetach(ifp);
+#endif
+ if_detach(ifp);
+ free(pflowif->sc_imo.imo_membership, M_IPMOPTS);
+ free(pflowif, M_DEVBUF);
+ pflowif = NULL;
+ return (0);
+}
+
+/*
+ * Start output on the pflow interface.
+ */
+void
+pflowstart(struct ifnet *ifp)
+{
+ struct mbuf *m;
+ int s;
+
+ for (;;) {
+ s = splnet();
+ IF_DROP(&ifp->if_snd);
+ IF_DEQUEUE(&ifp->if_snd, m);
+ splx(s);
+
+ if (m == NULL)
+ return;
+ m_freem(m);
+ }
+}
+
+int
+pflowoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+ struct rtentry *rt)
+{
+ m_freem(m);
+ return (0);
+}
+
+/* ARGSUSED */
+int
+pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct proc *p = curproc;
+ struct pflow_softc *sc = ifp->if_softc;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct pflowreq pflowr;
+ int s, error;
+
+ switch (cmd) {
+ case SIOCSIFADDR:
+ case SIOCAIFADDR:
+ case SIOCSIFDSTADDR:
+ case SIOCSIFFLAGS:
+ if ((ifp->if_flags & IFF_UP) &&
+ sc->sc_receiver_ip.s_addr != 0 &&
+ sc->sc_receiver_port != 0)
+ ifp->if_flags |= IFF_RUNNING;
+ else
+ ifp->if_flags &= ~IFF_RUNNING;
+ break;
+ case SIOCSIFMTU:
+ if (ifr->ifr_mtu < PFLOW_MINMTU)
+ return (EINVAL);
+ if (ifr->ifr_mtu > MCLBYTES)
+ ifr->ifr_mtu = MCLBYTES;
+ s = splnet();
+ if (ifr->ifr_mtu < ifp->if_mtu)
+ pflow_sendout(sc);
+ pflow_setmtu(sc, ifr->ifr_mtu);
+ splx(s);
+ break;
+
+ case SIOCGETPFLOW:
+ bzero(&pflowr, sizeof(pflowr));
+
+ pflowr.sender_ip = sc->sc_sender_ip;
+ pflowr.receiver_ip = sc->sc_receiver_ip;
+ pflowr.receiver_port = sc->sc_receiver_port;
+
+ if ((error = copyout(&pflowr, ifr->ifr_data,
+ sizeof(pflowr))))
+ return (error);
+ break;
+
+ case SIOCSETPFLOW:
+ if ((error = suser(p, p->p_acflag)) != 0)
+ return (error);
+ if ((error = copyin(ifr->ifr_data, &pflowr,
+ sizeof(pflowr))))
+ return (error);
+
+ if ((ifp->if_flags & IFF_UP) && sc->sc_receiver_ip.s_addr != 0
+ && sc->sc_receiver_port != 0) {
+ s = splnet();
+ pflow_sendout(sc);
+ splx(s);
+ }
+
+ if (pflowr.addrmask & PFLOW_MASK_DSTIP)
+ sc->sc_receiver_ip = pflowr.receiver_ip;
+ if (pflowr.addrmask & PFLOW_MASK_DSTPRT)
+ sc->sc_receiver_port = pflowr.receiver_port;
+ if (pflowr.addrmask & PFLOW_MASK_SRCIP)
+ sc->sc_sender_ip.s_addr = pflowr.sender_ip.s_addr;
+
+ if ((ifp->if_flags & IFF_UP) &&
+ sc->sc_receiver_ip.s_addr != 0 &&
+ sc->sc_receiver_port != 0)
+ ifp->if_flags |= IFF_RUNNING;
+ else
+ ifp->if_flags &= ~IFF_RUNNING;
+
+ break;
+
+ default:
+ return (ENOTTY);
+ }
+ return (0);
+}
+
+void
+pflow_setmtu(struct pflow_softc *sc, int mtu_req)
+{
+ int mtu;
+
+ if (sc->sc_pflow_ifp && sc->sc_pflow_ifp->if_mtu < mtu_req)
+ mtu = sc->sc_pflow_ifp->if_mtu;
+ else
+ mtu = mtu_req;
+
+ sc->sc_maxcount = (mtu - sizeof(struct pflow_header)) /
+ sizeof(struct pflow_flow);
+ if (sc->sc_maxcount > PFLOW_MAXFLOWS)
+ sc->sc_maxcount = PFLOW_MAXFLOWS;
+ sc->sc_if.if_mtu = sizeof(struct pflow_header) +
+ sc->sc_maxcount * sizeof(struct pflow_flow);
+}
+
+struct mbuf *
+pflow_get_mbuf(struct pflow_softc *sc, void **sp)
+{
+ struct pflow_header *h;
+ struct mbuf *m, *top = NULL, **mp = &top;
+ int len, totlen;
+
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
+ if (m == NULL) {
+ sc->sc_if.if_oerrors++;
+ return (NULL);
+ }
+
+ len = MHLEN;
+ totlen = (sc->sc_maxcount * sizeof(struct pflow_flow)) +
+ sizeof(struct pflow_header);
+
+ while (totlen > 0) {
+ if (top) {
+ MGET(m, M_DONTWAIT, MT_DATA);
+ if (m == 0) {
+ m_freem(top);
+ sc->sc_if.if_oerrors++;
+ return (NULL);
+ }
+ len = MLEN;
+ }
+ if (totlen >= MINCLSIZE) {
+ MCLGET(m, M_DONTWAIT);
+ if (m->m_flags & M_EXT)
+ len = MCLBYTES;
+ else {
+ m_free(m);
+ m_freem(top);
+ sc->sc_if.if_oerrors++;
+ return (NULL);
+ }
+ }
+ m->m_len = len = min(totlen, len);
+ totlen -= len;
+ *mp = m;
+ mp = &m->m_next;
+ }
+
+ top->m_pkthdr.rcvif = NULL;
+ top->m_len = m->m_pkthdr.len = sizeof(struct pflow_header);
+
+ /* populate pflow_header */
+ h = mtod(top, struct pflow_header *);
+ h->reserved1 = 0;
+ h->reserved2 = 0;
+ h->count = 0;
+ h->version = htons(PFLOW_VERSION);
+ h->flow_sequence = htonl(sc->sc_gcounter);
+ h->engine_type = PFLOW_ENGINE_TYPE;
+ h->engine_id = PFLOW_ENGINE_ID;
+
+ sc->sc_count = 0;
+ *sp = (void *)((char *)h + PFLOW_HDRLEN);
+ timeout_add_sec(&sc->sc_tmo, PFLOW_TIMEOUT);
+ return (top);
+}
+
+void
+copy_flow_data(struct pflow_flow *flow1, struct pflow_flow *flow2,
+ struct pf_state *st, int src, int dst)
+{
+ struct pf_state_key *sk = st->key[PF_SK_WIRE];
+
+ flow1->src_ip = flow2->dest_ip =
+ st->key[PF_SK_WIRE]->addr[src].v4.s_addr;
+ flow1->src_port = flow2->dest_port = st->key[PF_SK_WIRE]->port[src];
+ flow1->dest_ip = flow2->src_ip =
+ st->key[PF_SK_WIRE]->addr[dst].v4.s_addr;
+ flow1->dest_port = flow2->src_port = st->key[PF_SK_WIRE]->port[dst];
+
+ flow1->dest_as = flow2->src_as =
+ flow1->src_as = flow2->dest_as = 0;
+ flow1->if_index_out = flow2->if_index_in =
+ flow1->if_index_in = flow2->if_index_out = 0;
+ flow1->dest_mask = flow2->src_mask =
+ flow1->src_mask = flow2->dest_mask = 0;
+
+ flow1->flow_packets = htonl(st->packets[0]);
+ flow2->flow_packets = htonl(st->packets[1]);
+ flow1->flow_octets = htonl(st->bytes[0]);
+ flow2->flow_octets = htonl(st->bytes[1]);
+
+ flow1->flow_start = flow2->flow_start = htonl(st->creation * 1000);
+ flow1->flow_finish = flow2->flow_finish = htonl(time_second * 1000);
+ flow1->tcp_flags = flow2->tcp_flags = 0;
+ flow1->protocol = flow2->protocol = sk->proto;
+ flow1->tos = flow2->tos = st->rule.ptr->tos;
+}
+
+int
+export_pflow(struct pf_state *st)
+{
+ struct pf_state pfs_copy;
+ struct pflow_softc *sc = pflowif;
+ struct ifnet *ifp = NULL;
+ u_int64_t bytes[2];
+ int ret = 0;
+
+ if (sc == NULL)
+ return (0);
+
+ ifp = &sc->sc_if;
+ if (!(ifp->if_flags & IFF_UP))
+ return (0);
+
+ if ((st->bytes[0] < (u_int64_t)PFLOW_MAXBYTES)
+ && (st->bytes[1] < (u_int64_t)PFLOW_MAXBYTES))
+ return pflow_pack_flow(st);
+
+ /* flow > PFLOW_MAXBYTES need special handling */
+ bcopy(st, &pfs_copy, sizeof(pfs_copy));
+ bytes[0] = pfs_copy.bytes[0];
+ bytes[1] = pfs_copy.bytes[1];
+
+ while (bytes[0] > PFLOW_MAXBYTES) {
+ pfs_copy.bytes[0] = PFLOW_MAXBYTES;
+ pfs_copy.bytes[1] = 0;
+
+ if ((ret = pflow_pack_flow(&pfs_copy)) != 0)
+ return (ret);
+ if ((bytes[0] - PFLOW_MAXBYTES) > 0)
+ bytes[0] -= PFLOW_MAXBYTES;
+ }
+
+ while (bytes[1] > (u_int64_t)PFLOW_MAXBYTES) {
+ pfs_copy.bytes[1] = PFLOW_MAXBYTES;
+ pfs_copy.bytes[0] = 0;
+
+ if ((ret = pflow_pack_flow(&pfs_copy)) != 0)
+ return (ret);
+ if ((bytes[1] - PFLOW_MAXBYTES) > 0)
+ bytes[1] -= PFLOW_MAXBYTES;
+ }
+
+ pfs_copy.bytes[0] = bytes[0];
+ pfs_copy.bytes[1] = bytes[1];
+
+ return (pflow_pack_flow(&pfs_copy));
+}
+
+int
+pflow_pack_flow(struct pf_state *st)
+{
+ struct pflow_softc *sc = pflowif;
+ struct pflow_flow *flow1 = NULL;
+ struct pflow_flow flow2;
+ struct pf_state_key *sk = st->key[PF_SK_WIRE];
+ int s, ret = 0;
+
+ if (sk->af != AF_INET)
+ return (0);
+
+ s = splnet();
+
+ if (sc->sc_mbuf == NULL) {
+ if ((sc->sc_mbuf = pflow_get_mbuf(sc,
+ (void **)&sc->sc_flowp.s)) == NULL) {
+ splx(s);
+ return (ENOMEM);
+ }
+ }
+
+ pflowstats.pflow_flows++;
+ sc->sc_gcounter++;
+ sc->sc_count++;
+
+ flow1 = sc->sc_flowp.s++;
+ sc->sc_mbuf->m_pkthdr.len =
+ sc->sc_mbuf->m_len += sizeof(struct pflow_flow);
+ bzero(flow1, sizeof(*flow1));
+ bzero(&flow2, sizeof(flow2));
+
+ if (st->direction == PF_OUT)
+ copy_flow_data(flow1, &flow2, st, 1, 0);
+ else
+ copy_flow_data(flow1, &flow2, st, 0, 1);
+
+ if (st->bytes[0] != 0) { /* first flow from state */
+ if (sc->sc_count >= sc->sc_maxcount)
+ ret = pflow_sendout(sc);
+
+ if (st->bytes[1] != 0) {
+ /* one more flow, second part from state */
+ if (sc->sc_mbuf == NULL) {
+ if ((sc->sc_mbuf = pflow_get_mbuf(sc,
+ (void **)&sc->sc_flowp.s)) == NULL) {
+ splx(s);
+ return (ENOMEM);
+ }
+ }
+
+ pflowstats.pflow_flows++;
+ sc->sc_gcounter++;
+ sc->sc_count++;
+
+ flow1 = sc->sc_flowp.s++;
+ sc->sc_mbuf->m_pkthdr.len =
+ sc->sc_mbuf->m_len += sizeof(struct pflow_flow);
+ bzero(flow1, sizeof(*flow1));
+ }
+ }
+
+ if (st->bytes[1] != 0) { /* second flow from state */
+ bcopy(&flow2, flow1, sizeof(*flow1));
+ if (sc->sc_count >= sc->sc_maxcount)
+ ret = pflow_sendout(sc);
+ }
+
+ splx(s);
+ return (ret);
+}
+
+void
+pflow_timeout(void *v)
+{
+ struct pflow_softc *sc = v;
+ int s;
+
+ s = splnet();
+ pflow_sendout(sc);
+ splx(s);
+}
+
+/* This must be called in splnet() */
+int
+pflow_sendout(struct pflow_softc *sc)
+{
+ struct mbuf *m;
+ struct pflow_header *h;
+#if NBPFILTER > 0
+ struct ifnet *ifp = &sc->sc_if;
+#endif
+
+ timeout_del(&sc->sc_tmo);
+
+ if (sc->sc_mbuf == NULL)
+ return (0);
+
+ pflowstats.pflow_packets++;
+
+ if (!(ifp->if_flags & IFF_RUNNING)) {
+ m_freem(m);
+ return (0);
+ }
+
+ m = sc->sc_mbuf;
+ sc->sc_mbuf = NULL;
+ sc->sc_flowp.s = NULL;
+ h = mtod(m, struct pflow_header *);
+ h->count = htons(sc->sc_count);
+
+ /* populate pflow_header */
+ h->uptime_ms = htonl(time_uptime * 1000);
+ h->time_sec = htonl(time_second);
+ h->time_nanosec = htonl(ticks);
+
+#if NBPFILTER > 0
+ if (ifp->if_bpf)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif
+
+ return (pflow_sendout_mbuf(sc, m));
+}
+
+int
+pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
+{
+ struct udpiphdr *ui;
+ int len = m->m_pkthdr.len;
+
+ /* UDP Header*/
+ M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT);
+ if (m == NULL) {
+ pflowstats.pflow_onomem++;
+ return (0);
+ }
+
+ ui = mtod(m, struct udpiphdr *);
+ ui->ui_pr = IPPROTO_UDP;
+ ui->ui_len = htons((u_int16_t) len + sizeof (struct udphdr));
+ ui->ui_src = sc->sc_sender_ip;
+ ui->ui_sport = sc->sc_sender_port;
+ ui->ui_dst = sc->sc_receiver_ip;
+ ui->ui_dport = sc->sc_receiver_port;
+ ui->ui_ulen = ui->ui_len;
+
+ ((struct ip *)ui)->ip_v = IPVERSION;
+ ((struct ip *)ui)->ip_hl = sizeof(struct ip) >> 2;
+ ((struct ip *)ui)->ip_id = htons(ip_randomid());
+ ((struct ip *)ui)->ip_off = htons(IP_DF);
+ ((struct ip *)ui)->ip_tos = IPTOS_LOWDELAY;
+ ((struct ip *)ui)->ip_ttl = IPDEFTTL;
+ ((struct ip *)ui)->ip_len = htons(sizeof (struct udpiphdr) + len);
+
+ /*
+ * Compute the pseudo-header checksum; defer further checksumming
+ * until ip_output() or hardware (if it exists).
+ */
+ m->m_pkthdr.csum_flags |= M_UDPV4_CSUM_OUT;
+ ui->ui_sum = in_cksum_phdr(ui->ui_src.s_addr,
+ ui->ui_dst.s_addr, htons((u_int16_t)len +
+ sizeof(struct udphdr) + IPPROTO_UDP));
+
+ if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
+ pflowstats.pflow_oerrors++;
+ return (0);
+}
+
+int
+pflow_get_dynport(void)
+{
+ u_int16_t tmp, low, high, cut;
+
+ low = ipport_hifirstauto; /* sysctl */
+ high = ipport_hilastauto;
+
+ cut = arc4random_uniform(1 + high - low) + low;
+
+ for (tmp = cut; tmp <= high; ++(tmp)) {
+ if (!in_baddynamic(tmp, IPPROTO_UDP))
+ return (htons(tmp));
+ }
+
+ for (tmp = cut - 1; tmp >= low; --(tmp)) {
+ if (!in_baddynamic(tmp, IPPROTO_UDP))
+ return (htons(tmp));
+ }
+
+ return (htons(ipport_hilastauto)); /* XXX */
+}