diff options
author | David Gwynne <dlg@cvs.openbsd.org> | 2019-03-01 04:47:34 +0000 |
---|---|---|
committer | David Gwynne <dlg@cvs.openbsd.org> | 2019-03-01 04:47:34 +0000 |
commit | d29a5aa65bfd884803a1eb5820f990d919696aa4 (patch) | |
tree | 0e10b6d89ffbde9050d79e56c155936c59a8ed9e /sys | |
parent | 43f1cf5d7d05c478c6845bbe4a02d5584230ac7e (diff) |
rework how ifiq_input decides the stack is busy and whether it should drop
previously ifiq_input uses the traditional backpressure or defense
mechanism and counts packets to decide when to shed load by dropping.
currently it ends up waiting for 10240 packets to get queued on the
stack before it would decide to drop packets. this may be ok for
some machines, but for a lot this was too much.
this diff reworks how ifiqs measure how busy the stack is by
introducing an ifiq_pressure counter that is incremented when
ifiq_input is called, and cleared when ifiq_process calls the network
stack to process the queue. if ifiq_input is called multiple times
before ifiq_process in a net taskq runs, ifiq_pressure goes up, and
ifiq_input uses a high value to decide the stack is busy and it
should drop.
i was hoping there would be no performance impact from this change,
but hrvoje popovski notes a slight bump in forwarding performance.
my own testing shows that the ifiq input list length grows to a
fraction of the 10240 it used to get to, which means the maximum
burst of packets through the stack is smoothed out a bit. instead
of big lists of packets followed by big periods of drops, we get
relatively small bursts of packets with smaller gaps where we drop.
the follow-on from this is to make drivers implementing rx ring
moderation to use the return value of ifiq_input to scale the ring
allocation down, allowing the hardware to drop packets so software
doesnt have to.
Diffstat (limited to 'sys')
-rw-r--r-- | sys/net/if.c | 4 | ||||
-rw-r--r-- | sys/net/ifq.c | 20 | ||||
-rw-r--r-- | sys/net/ifq.h | 6 |
3 files changed, 17 insertions, 13 deletions
diff --git a/sys/net/if.c b/sys/net/if.c index 9548c21293c..dadafb649eb 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if.c,v 1.572 2019/02/26 03:20:08 dlg Exp $ */ +/* $OpenBSD: if.c,v 1.573 2019/03/01 04:47:32 dlg Exp $ */ /* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */ /* @@ -738,7 +738,7 @@ if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m) void if_input(struct ifnet *ifp, struct mbuf_list *ml) { - ifiq_input(&ifp->if_rcv, ml, 2048); + ifiq_input(&ifp->if_rcv, ml); } int diff --git a/sys/net/ifq.c b/sys/net/ifq.c index 418f5f7c32b..d6b6e3032b6 100644 --- a/sys/net/ifq.c +++ b/sys/net/ifq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ifq.c,v 1.25 2018/12/16 03:36:02 dlg Exp $ */ +/* $OpenBSD: ifq.c,v 1.26 2019/03/01 04:47:33 dlg Exp $ */ /* * Copyright (c) 2015 David Gwynne <dlg@openbsd.org> @@ -445,6 +445,7 @@ ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) mtx_init(&ifiq->ifiq_mtx, IPL_NET); ml_init(&ifiq->ifiq_ml); task_set(&ifiq->ifiq_task, ifiq_process, ifiq); + ifiq->ifiq_pressure = 0; ifiq->ifiq_qdrops = 0; ifiq->ifiq_packets = 0; @@ -467,17 +468,20 @@ ifiq_destroy(struct ifiqueue *ifiq) ml_purge(&ifiq->ifiq_ml); } +unsigned int ifiq_pressure_drop = 16; +unsigned int ifiq_pressure_return = 2; + int -ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml, unsigned int cwm) +ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml) { struct ifnet *ifp = ifiq->ifiq_if; struct mbuf *m; uint64_t packets; uint64_t bytes = 0; + unsigned int pressure; #if NBPFILTER > 0 caddr_t if_bpf; #endif - int rv = 1; if (ml_empty(ml)) return (0); @@ -518,12 +522,11 @@ ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml, unsigned int cwm) ifiq->ifiq_packets += packets; ifiq->ifiq_bytes += bytes; - if (ifiq_len(ifiq) >= cwm * 5) + pressure = ++ifiq->ifiq_pressure; + if (pressure > ifiq_pressure_drop) ifiq->ifiq_qdrops += ml_len(ml); - else { - rv = (ifiq_len(ifiq) >= cwm * 3); + else ml_enlist(&ifiq->ifiq_ml, ml); - } mtx_leave(&ifiq->ifiq_mtx); if (ml_empty(ml)) @@ -531,7 +534,7 @@ ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml, unsigned int cwm) else ml_purge(ml); - return (rv); + return (pressure > ifiq_pressure_return); } void @@ -573,6 +576,7 @@ ifiq_process(void *arg) return; mtx_enter(&ifiq->ifiq_mtx); + ifiq->ifiq_pressure = 0; ml = ifiq->ifiq_ml; ml_init(&ifiq->ifiq_ml); mtx_leave(&ifiq->ifiq_mtx); diff --git a/sys/net/ifq.h b/sys/net/ifq.h index 6beb428b176..fdbfc170314 100644 --- a/sys/net/ifq.h +++ b/sys/net/ifq.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ifq.h,v 1.22 2018/12/11 01:36:42 dlg Exp $ */ +/* $OpenBSD: ifq.h,v 1.23 2019/03/01 04:47:33 dlg Exp $ */ /* * Copyright (c) 2015 David Gwynne <dlg@openbsd.org> @@ -80,6 +80,7 @@ struct ifiqueue { struct mutex ifiq_mtx; struct mbuf_list ifiq_ml; struct task ifiq_task; + unsigned int ifiq_pressure; /* counters */ uint64_t ifiq_packets; @@ -473,8 +474,7 @@ ifq_idx(struct ifqueue *ifq, unsigned int nifqs, const struct mbuf *m) void ifiq_init(struct ifiqueue *, struct ifnet *, unsigned int); void ifiq_destroy(struct ifiqueue *); -int ifiq_input(struct ifiqueue *, struct mbuf_list *, - unsigned int); +int ifiq_input(struct ifiqueue *, struct mbuf_list *); int ifiq_enqueue(struct ifiqueue *, struct mbuf *); void ifiq_add_data(struct ifiqueue *, struct if_data *); void ifiq_barrier(struct ifiqueue *); |