summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Belopuhov <mikeb@cvs.openbsd.org>2017-05-04 17:57:57 +0000
committerMike Belopuhov <mikeb@cvs.openbsd.org>2017-05-04 17:57:57 +0000
commitcc09ef627d2e8a7015f53b5c91a6ef7cf0f5cfe2 (patch)
tree4f53fccad6f35a409e75b0774affce1c37d30a24
parent8bef1bbb61ad14d5d32efdc5d06c918b519b3e9d (diff)
Implementation of the Flow Queue - Controlled Delay (FQ-CoDel)
The purpose of FQ-CoDel is to provide fair sharing of bandwidth between simultaneous connections and reduce latency differences among them. OK mpi, sthen, visa
-rw-r--r--sys/net/fq_codel.c672
-rw-r--r--sys/net/fq_codel.h51
2 files changed, 723 insertions, 0 deletions
diff --git a/sys/net/fq_codel.c b/sys/net/fq_codel.c
new file mode 100644
index 00000000000..0a27ce99dc3
--- /dev/null
+++ b/sys/net/fq_codel.c
@@ -0,0 +1,672 @@
+/*
+ * Copyright (c) 2017 Mike Belopuhov
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm
+ * IETF draft-ietf-aqm-codel-07
+ *
+ * Based on the algorithm by Kathleen Nichols and Van Jacobson with
+ * improvements from Dave Taht and Eric Dumazet.
+ */
+
+/*
+ * The FlowQueue-CoDel Packet Scheduler and Active Queue Management
+ * IETF draft-ietf-aqm-fq-codel-06
+ *
+ * Based on the implementation by Rasool Al-Saadi, Centre for Advanced
+ * Internet Architectures, Swinburne University of Technology, Melbourne,
+ * Australia.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/pfvar.h>
+#include <net/fq_codel.h>
+
+/* #define FQCODEL_DEBUG 1 */
+
+#ifdef FQCODEL_DEBUG
+#define DPRINTF(x...) printf(x)
+#else
+#define DPRINTF(x...)
+#endif
+
+struct codel {
+ struct mbuf_list q;
+
+ unsigned int dropping:1; /* Dropping state */
+ unsigned int backlog:31; /* Number of bytes in the queue */
+
+ unsigned short drops; /* Free running counter of drops */
+ unsigned short ldrops; /* Value from the previous run */
+
+ int64_t start; /* The moment queue was above target */
+ int64_t next; /* Next interval */
+};
+
+struct codel_params {
+ int64_t target;
+ int64_t interval;
+ int quantum;
+
+ uint32_t *intervals;
+};
+
+void codel_initparams(struct codel_params *, unsigned int,
+ unsigned int, int);
+void codel_freeparams(struct codel_params *);
+void codel_gettime(int64_t *);
+unsigned int codel_backlog(struct codel *);
+unsigned int codel_qlength(struct codel *);
+void codel_enqueue(struct codel *, int64_t, struct mbuf *);
+struct mbuf *codel_dequeue(struct codel *, struct codel_params *, int64_t,
+ struct mbuf_list *, uint64_t *, uint64_t *);
+struct mbuf *codel_commit(struct codel *, struct mbuf *);
+void codel_purge(struct codel *, struct mbuf_list *ml);
+
+struct flow {
+ struct codel cd;
+ int active:1;
+ int deficit:31;
+#ifdef FQCODEL_DEBUG
+ uint16_t id;
+#endif
+ SIMPLEQ_ENTRY(flow) flowentry;
+};
+SIMPLEQ_HEAD(flowq, flow);
+
+struct fqcodel {
+ struct flowq newq;
+ struct flowq oldq;
+
+ struct flow *flows;
+
+ struct codel_params cparams;
+
+ unsigned int nflows;
+ unsigned int qlimit;
+ int quantum;
+
+ unsigned int flags;
+#define FQCF_FIXED_QUANTUM 0x1
+
+ /* stats */
+ struct fqcodel_pktcntr xmit_cnt;
+ struct fqcodel_pktcntr drop_cnt;
+};
+
+unsigned int fqcodel_idx(unsigned int, const struct mbuf *);
+void *fqcodel_alloc(unsigned int, void *);
+void fqcodel_free(unsigned int, void *);
+struct mbuf *fqcodel_enq(struct ifqueue *, struct mbuf *);
+struct mbuf *fqcodel_deq_begin(struct ifqueue *, void **);
+void fqcodel_deq_commit(struct ifqueue *, struct mbuf *, void *);
+void fqcodel_purge(struct ifqueue *, struct mbuf_list *);
+
+/*
+ * ifqueue glue.
+ */
+
+static const struct ifq_ops fqcodel_ops = {
+ fqcodel_idx,
+ fqcodel_enq,
+ fqcodel_deq_begin,
+ fqcodel_deq_commit,
+ fqcodel_purge,
+ fqcodel_alloc,
+ fqcodel_free,
+};
+
+const struct ifq_ops * const ifq_fqcodel_ops = &fqcodel_ops;
+
+/* Default aggregate queue depth */
+static const unsigned int fqcodel_qlimit = 1024;
+
+/* Packet drop threshold */
+static const unsigned int fqcodel_threshold = 64;
+
+/*
+ * CoDel implementation
+ */
+
+/* Delay target, 5ms */
+static const int64_t codel_target = 5000000;
+
+/* Grace period after last drop, 16 100ms intervals */
+static const int64_t codel_grace = 1600000000;
+
+/* First 399 "100 / sqrt(x)" intervarls, ns precision */
+static const uint32_t codel_intervals[] = {
+ 100000000, 70710678, 57735027, 50000000, 44721360, 40824829, 37796447,
+ 35355339, 33333333, 31622777, 30151134, 28867513, 27735010, 26726124,
+ 25819889, 25000000, 24253563, 23570226, 22941573, 22360680, 21821789,
+ 21320072, 20851441, 20412415, 20000000, 19611614, 19245009, 18898224,
+ 18569534, 18257419, 17960530, 17677670, 17407766, 17149859, 16903085,
+ 16666667, 16439899, 16222142, 16012815, 15811388, 15617376, 15430335,
+ 15249857, 15075567, 14907120, 14744196, 14586499, 14433757, 14285714,
+ 14142136, 14002801, 13867505, 13736056, 13608276, 13483997, 13363062,
+ 13245324, 13130643, 13018891, 12909944, 12803688, 12700013, 12598816,
+ 12500000, 12403473, 12309149, 12216944, 12126781, 12038585, 11952286,
+ 11867817, 11785113, 11704115, 11624764, 11547005, 11470787, 11396058,
+ 11322770, 11250879, 11180340, 11111111, 11043153, 10976426, 10910895,
+ 10846523, 10783277, 10721125, 10660036, 10599979, 10540926, 10482848,
+ 10425721, 10369517, 10314212, 10259784, 10206207, 10153462, 10101525,
+ 10050378, 10000000, 9950372, 9901475, 9853293, 9805807, 9759001,
+ 9712859, 9667365, 9622504, 9578263, 9534626, 9491580, 9449112,
+ 9407209, 9365858, 9325048, 9284767, 9245003, 9205746, 9166985,
+ 9128709, 9090909, 9053575, 9016696, 8980265, 8944272, 8908708,
+ 8873565, 8838835, 8804509, 8770580, 8737041, 8703883, 8671100,
+ 8638684, 8606630, 8574929, 8543577, 8512565, 8481889, 8451543,
+ 8421519, 8391814, 8362420, 8333333, 8304548, 8276059, 8247861,
+ 8219949, 8192319, 8164966, 8137885, 8111071, 8084521, 8058230,
+ 8032193, 8006408, 7980869, 7955573, 7930516, 7905694, 7881104,
+ 7856742, 7832604, 7808688, 7784989, 7761505, 7738232, 7715167,
+ 7692308, 7669650, 7647191, 7624929, 7602859, 7580980, 7559289,
+ 7537784, 7516460, 7495317, 7474351, 7453560, 7432941, 7412493,
+ 7392213, 7372098, 7352146, 7332356, 7312724, 7293250, 7273930,
+ 7254763, 7235746, 7216878, 7198158, 7179582, 7161149, 7142857,
+ 7124705, 7106691, 7088812, 7071068, 7053456, 7035975, 7018624,
+ 7001400, 6984303, 6967330, 6950480, 6933752, 6917145, 6900656,
+ 6884284, 6868028, 6851887, 6835859, 6819943, 6804138, 6788442,
+ 6772855, 6757374, 6741999, 6726728, 6711561, 6696495, 6681531,
+ 6666667, 6651901, 6637233, 6622662, 6608186, 6593805, 6579517,
+ 6565322, 6551218, 6537205, 6523281, 6509446, 6495698, 6482037,
+ 6468462, 6454972, 6441566, 6428243, 6415003, 6401844, 6388766,
+ 6375767, 6362848, 6350006, 6337243, 6324555, 6311944, 6299408,
+ 6286946, 6274558, 6262243, 6250000, 6237829, 6225728, 6213698,
+ 6201737, 6189845, 6178021, 6166264, 6154575, 6142951, 6131393,
+ 6119901, 6108472, 6097108, 6085806, 6074567, 6063391, 6052275,
+ 6041221, 6030227, 6019293, 6008418, 5997601, 5986843, 5976143,
+ 5965500, 5954913, 5944383, 5933908, 5923489, 5913124, 5902813,
+ 5892557, 5882353, 5872202, 5862104, 5852057, 5842062, 5832118,
+ 5822225, 5812382, 5802589, 5792844, 5783149, 5773503, 5763904,
+ 5754353, 5744850, 5735393, 5725983, 5716620, 5707301, 5698029,
+ 5688801, 5679618, 5670480, 5661385, 5652334, 5643326, 5634362,
+ 5625440, 5616560, 5607722, 5598925, 5590170, 5581456, 5572782,
+ 5564149, 5555556, 5547002, 5538488, 5530013, 5521576, 5513178,
+ 5504819, 5496497, 5488213, 5479966, 5471757, 5463584, 5455447,
+ 5447347, 5439283, 5431254, 5423261, 5415304, 5407381, 5399492,
+ 5391639, 5383819, 5376033, 5368281, 5360563, 5352877, 5345225,
+ 5337605, 5330018, 5322463, 5314940, 5307449, 5299989, 5292561,
+ 5285164, 5277798, 5270463, 5263158, 5255883, 5248639, 5241424,
+ 5234239, 5227084, 5219958, 5212860, 5205792, 5198752, 5191741,
+ 5184758, 5177804, 5170877, 5163978, 5157106, 5150262, 5143445,
+ 5136655, 5129892, 5123155, 5116445, 5109761, 5103104, 5096472,
+ 5089866, 5083286, 5076731, 5070201, 5063697, 5057217, 5050763,
+ 5044333, 5037927, 5031546, 5025189, 5018856, 5012547, 5006262
+};
+
+void
+codel_initparams(struct codel_params *cp, unsigned int target,
+ unsigned int interval, int quantum)
+{
+ uint64_t mult;
+ unsigned int i;
+
+ /*
+ * Update observation intervals table according to the configured
+ * initial interval value.
+ */
+ if (interval > codel_intervals[0]) {
+ /* Select either specified target or 5% of an interval */
+ cp->target = MAX(target, interval / 5);
+ cp->interval = interval;
+
+ /* The coefficient is scaled up by a 1000 */
+ mult = ((uint64_t)cp->interval * 1000) / codel_intervals[0];
+
+ /* Prepare table of intervals */
+ cp->intervals = mallocarray(nitems(codel_intervals),
+ sizeof(codel_intervals[0]), M_DEVBUF, M_WAITOK | M_ZERO);
+ for (i = 0; i < nitems(codel_intervals); i++)
+ cp->intervals[i] = ((uint64_t)codel_intervals[i] *
+ mult) / 1000;
+ } else {
+ cp->target = MAX(target, codel_target);
+ cp->interval = codel_intervals[0];
+ cp->intervals = (uint32_t *)codel_intervals;
+ }
+
+ cp->quantum = quantum;
+}
+
+void
+codel_freeparams(struct codel_params *cp)
+{
+ if (cp->intervals != codel_intervals)
+ free(cp->intervals, M_DEVBUF, nitems(codel_intervals) *
+ sizeof(codel_intervals[0]));
+ cp->intervals = NULL;
+}
+
+void
+codel_gettime(int64_t *now)
+{
+ struct timespec tv;
+
+ nanouptime(&tv);
+ *now = tv.tv_sec * 1000000000LL + tv.tv_nsec;
+}
+
+unsigned int
+codel_backlog(struct codel *cd)
+{
+ return (cd->backlog);
+}
+
+unsigned int
+codel_qlength(struct codel *cd)
+{
+ return (ml_len(&cd->q));
+}
+
+void
+codel_enqueue(struct codel *cd, int64_t now, struct mbuf *m)
+{
+ m->m_pkthdr.ph_timestamp = now;
+
+ ml_enqueue(&cd->q, m);
+ cd->backlog += m->m_pkthdr.len;
+}
+
+/*
+ * Select the next interval according to the number of drops
+ * in the current one relative to the provided timestamp.
+ */
+static inline void
+control_law(struct codel *cd, struct codel_params *cp, int64_t rts)
+{
+ unsigned int idx;
+
+ idx = min(cd->drops, nitems(codel_intervals) - 1);
+ cd->next = rts + cp->intervals[idx];
+}
+
+/*
+ * Pick the next enqueued packet and determine the queueing delay
+ * as well as whether or not it's a good candidate for dropping
+ * from the queue.
+ *
+ * The decision whether to drop the packet or not is made based
+ * on the queueing delay target of 5ms and on the current queue
+ * lenght in bytes which shouldn't be less than the amount of data
+ * that arrives in a typical interarrival time (MTU-sized packets
+ * arriving spaced by the amount of time it takes to send such a
+ * packet on the bottleneck).
+ */
+static inline struct mbuf *
+codel_next_packet(struct codel *cd, struct codel_params *cp, int64_t now,
+ int *drop)
+{
+ struct mbuf *m;
+
+ *drop = 0;
+
+ m = MBUF_LIST_FIRST(&cd->q);
+ if (m == NULL) {
+ KASSERT(cd->backlog == 0);
+ /* Empty queue, reset interval */
+ cd->start = 0;
+ return (NULL);
+ }
+
+ if (now - m->m_pkthdr.ph_timestamp < cp->target ||
+ cd->backlog <= cp->quantum) {
+ /*
+ * The minimum delay decreased below the target, reset
+ * the current observation interval.
+ */
+ cd->start = 0;
+ return (m);
+ }
+
+ if (cd->start == 0) {
+ /*
+ * This is the first packet to be delayed for more than
+ * the target, start the first observation interval after
+ * a single RTT and see if the minimum delay goes below
+ * the target within the interval, otherwise punish the
+ * next packet.
+ */
+ cd->start = now + cp->interval;
+ } else if (now > cd->start) {
+ *drop = 1;
+ }
+ return (m);
+}
+
+enum { ACCEPTING, FIRSTDROP, DROPPING, CONTROL, RECOVERY };
+
+static inline int
+codel_state_change(struct codel *cd, int64_t now, struct mbuf *m, int drop,
+ int state)
+{
+ if (state == FIRSTDROP)
+ return (ACCEPTING);
+
+ if (cd->dropping) {
+ if (!drop)
+ return (RECOVERY);
+ else if (now >= cd->next)
+ return (state == CONTROL ? DROPPING : CONTROL);
+ } else if (drop)
+ return (FIRSTDROP);
+
+ if (m == NULL)
+ return (RECOVERY);
+
+ return (ACCEPTING);
+}
+
+struct mbuf *
+codel_dequeue(struct codel *cd, struct codel_params *cp, int64_t now,
+ struct mbuf_list *free_ml, uint64_t *dpkts, uint64_t *dbytes)
+{
+ struct mbuf *m;
+ unsigned short delta;
+ int drop, state, done = 0;
+
+ *dpkts = *dbytes = 0;
+
+ state = cd->dropping ? DROPPING : ACCEPTING;
+
+ while (!done) {
+ m = codel_next_packet(cd, cp, now, &drop);
+ state = codel_state_change(cd, now, m, drop, state);
+
+ switch (state) {
+ case FIRSTDROP:
+ m = codel_commit(cd, m);
+ ml_enqueue(free_ml, m);
+
+ *dpkts += 1;
+ *dbytes += m->m_pkthdr.len;
+
+ cd->dropping = 1;
+
+ /*
+ * If we're still within the grace period and not
+ * meeting our minimal delay target we treat this
+ * as a continuation of the previous observation
+ * interval and shrink it further. Otherwise we
+ * start from the initial one.
+ */
+ delta = cd->drops - cd->ldrops;
+ if (delta > 1) {
+ if (now < cd->next ||
+ now - cd->next < codel_grace)
+ cd->drops = delta;
+ else
+ cd->drops = 1;
+ } else
+ cd->drops = 1;
+ control_law(cd, cp, now);
+ cd->ldrops = cd->drops;
+
+ /* fetches the next packet and goes to ACCEPTING */
+ break;
+ case DROPPING:
+ m = codel_commit(cd, m);
+ ml_enqueue(free_ml, m);
+ cd->drops++;
+
+ *dpkts += 1;
+ *dbytes += m->m_pkthdr.len;
+
+ /* fetches the next packet and goes to CONTROL */
+ break;
+ case CONTROL:
+ if (drop) {
+ control_law(cd, cp, cd->next);
+ continue;
+ }
+ /* FALLTHROUGH */
+ case RECOVERY:
+ cd->dropping = 0;
+ /* FALLTHROUGH */
+ case ACCEPTING:
+ done = 1;
+ break;
+ }
+ }
+
+ return (m);
+}
+
+struct mbuf *
+codel_commit(struct codel *cd, struct mbuf *m)
+{
+ struct mbuf *n;
+
+ n = ml_dequeue(&cd->q);
+ if (m)
+ KASSERT(n == m);
+ KASSERT(n != NULL);
+ KASSERT(cd->backlog >= n->m_pkthdr.len);
+ cd->backlog -= n->m_pkthdr.len;
+ return (n);
+}
+
+void
+codel_purge(struct codel *cd, struct mbuf_list *ml)
+{
+ ml_enlist(ml, &cd->q);
+ cd->backlog = 0;
+}
+
+/*
+ * FQ-CoDel implementation
+ */
+
+static inline struct flow *
+classify_flow(struct fqcodel *fqc, struct mbuf *m)
+{
+ unsigned int index;
+
+ if (m->m_pkthdr.ph_flowid & M_FLOWID_VALID)
+ index = (m->m_pkthdr.ph_flowid & M_FLOWID_MASK) % fqc->nflows;
+ else
+ index = arc4random_uniform(fqc->nflows);
+
+ DPRINTF("%s: %u\n", __func__, index);
+
+ return (&fqc->flows[index]);
+}
+
+struct mbuf *
+fqcodel_enq(struct ifqueue *ifq, struct mbuf *m)
+{
+ struct fqcodel *fqc = ifq->ifq_q;
+ struct flow *flow;
+ unsigned int backlog = 0;
+ int64_t now;
+ int i;
+
+ flow = classify_flow(fqc, m);
+ if (flow == NULL)
+ return (m);
+
+ codel_gettime(&now);
+ codel_enqueue(&flow->cd, now, m);
+
+ if (!flow->active) {
+ SIMPLEQ_INSERT_TAIL(&fqc->newq, flow, flowentry);
+ flow->deficit = fqc->quantum;
+ flow->active = 1;
+ DPRINTF("%s: flow %u active deficit %d\n", __func__,
+ flow->id, flow->deficit);
+ }
+
+ /*
+ * Check the limit for all queues and remove a packet
+ * from the longest one.
+ */
+ if (ifq_len(ifq) >= fqcodel_qlimit) {
+ for (i = 0; i < fqc->nflows; i++) {
+ if (codel_backlog(&fqc->flows[i].cd) > backlog) {
+ flow = &fqc->flows[i];
+ backlog = codel_backlog(&flow->cd);
+ }
+ }
+ KASSERT(flow != NULL);
+ m = codel_commit(&flow->cd, NULL);
+ DPRINTF("%s: dropping from flow %u\n", __func__,
+ flow->id);
+ return (m);
+ }
+
+ return (NULL);
+}
+
+static inline struct flowq *
+select_queue(struct fqcodel *fqc)
+{
+ struct flowq *fq = NULL;
+
+ if (!SIMPLEQ_EMPTY(&fqc->newq))
+ fq = &fqc->newq;
+ else if (!SIMPLEQ_EMPTY(&fqc->oldq))
+ fq = &fqc->oldq;
+ return (fq);
+}
+
+static inline struct flow *
+first_flow(struct fqcodel *fqc, struct flowq **fq)
+{
+ struct flow *flow;
+
+ while ((*fq = select_queue(fqc)) != NULL) {
+ while ((flow = SIMPLEQ_FIRST(*fq)) != NULL) {
+ if (flow->deficit <= 0) {
+ flow->deficit += fqc->quantum;
+ SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
+ SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow,
+ flowentry);
+ DPRINTF("%s: flow %u deficit %d\n", __func__,
+ flow->id, flow->deficit);
+ } else
+ return (flow);
+ }
+ }
+
+ return (NULL);
+}
+
+static inline struct flow *
+next_flow(struct fqcodel *fqc, struct flow *flow, struct flowq **fq)
+{
+ SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
+
+ if (*fq == &fqc->newq && !SIMPLEQ_EMPTY(&fqc->oldq)) {
+ /* A packet was dropped, starve the queue */
+ SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow, flowentry);
+ DPRINTF("%s: flow %u ->oldq deficit %d\n", __func__,
+ flow->id, flow->deficit);
+ } else {
+ /* A packet was dropped on a starved queue, disable it */
+ flow->active = 0;
+ DPRINTF("%s: flow %u inactive deficit %d\n", __func__,
+ flow->id, flow->deficit);
+ }
+
+ return (first_flow(fqc, fq));
+}
+
+struct mbuf *
+fqcodel_deq_begin(struct ifqueue *ifq, void **cookiep)
+{
+ struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
+ struct ifnet *ifp = ifq->ifq_if;
+ struct fqcodel *fqc = ifq->ifq_q;
+ struct flowq *fq;
+ struct flow *flow;
+ struct mbuf *m;
+ int64_t now;
+
+ if ((fqc->flags & FQCF_FIXED_QUANTUM) == 0)
+ fqc->quantum = ifp->if_mtu + max_linkhdr;
+
+ codel_gettime(&now);
+
+ for (flow = first_flow(fqc, &fq); flow != NULL;
+ flow = next_flow(fqc, flow, &fq)) {
+ m = codel_dequeue(&flow->cd, &fqc->cparams, now, &free_ml,
+ &fqc->drop_cnt.packets, &fqc->drop_cnt.bytes);
+
+ ifq_mfreeml(ifq, &free_ml);
+
+ if (m != NULL) {
+ flow->deficit -= m->m_pkthdr.len;
+ DPRINTF("%s: flow %u deficit %d\n", __func__,
+ flow->id, flow->deficit);
+ *cookiep = flow;
+ return (m);
+ }
+ }
+
+ return (NULL);
+}
+
+void
+fqcodel_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
+{
+ struct fqcodel *fqc = ifq->ifq_q;
+ struct flow *flow = cookie;
+
+ fqc->xmit_cnt.packets++;
+ fqc->xmit_cnt.bytes += m->m_pkthdr.len;
+
+ (void)codel_commit(&flow->cd, m);
+}
+
+void
+fqcodel_purge(struct ifqueue *ifq, struct mbuf_list *ml)
+{
+ struct fqcodel *fqc = ifq->ifq_q;
+ unsigned int i;
+
+ for (i = 0; i < fqc->nflows; i++)
+ codel_purge(&fqc->flows[i].cd, ml);
+}
+
+unsigned int
+fqcodel_idx(unsigned int nqueues, const struct mbuf *m)
+{
+ return (0);
+}
+
+void *
+fqcodel_alloc(unsigned int idx, void *arg)
+{
+ struct fqcodel *fqc = arg;
+
+ SIMPLEQ_INIT(&fqc->newq);
+ SIMPLEQ_INIT(&fqc->oldq);
+
+ return (fqc);
+}
+
+void
+fqcodel_free(unsigned int idx, void *arg)
+{
+ /* nothing to do here */
+}
diff --git a/sys/net/fq_codel.h b/sys/net/fq_codel.h
new file mode 100644
index 00000000000..7d79f5bcc7e
--- /dev/null
+++ b/sys/net/fq_codel.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2017 Mike Belopuhov
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _NET_FQ_CODEL_H_
+#define _NET_FQ_CODEL_H_
+
+/* compatible with hfsc_pktcntr */
+struct fqcodel_pktcntr {
+ uint64_t packets;
+ uint64_t bytes;
+};
+
+struct fqcodel_stats {
+ struct fqcodel_pktcntr xmit_cnt;
+ struct fqcodel_pktcntr drop_cnt;
+
+ uint32_t qlength;
+ uint32_t qlimit;
+
+ uint32_t flows;
+ uint32_t _unused; /* padding */
+
+ uint32_t target;
+ uint32_t interval;
+
+ uint32_t minqlen;
+ uint32_t maxqlen;
+
+ /* the values below are used to calculate standard deviation */
+ uint64_t qlensum; /* sum of queue lenghts */
+ uint64_t qlensumsq; /* sum of squared queue lenghts */
+};
+
+#ifdef _KERNEL
+extern const struct ifq_ops * const ifq_fqcodel_ops;
+#endif /* _KERNEL */
+
+#endif /* _NET_FQ_CODEL_H_ */