summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorDavid Gwynne <dlg@cvs.openbsd.org>2019-03-01 04:47:34 +0000
committerDavid Gwynne <dlg@cvs.openbsd.org>2019-03-01 04:47:34 +0000
commitd29a5aa65bfd884803a1eb5820f990d919696aa4 (patch)
tree0e10b6d89ffbde9050d79e56c155936c59a8ed9e /sys
parent43f1cf5d7d05c478c6845bbe4a02d5584230ac7e (diff)
rework how ifiq_input decides the stack is busy and whether it should drop
previously ifiq_input uses the traditional backpressure or defense mechanism and counts packets to decide when to shed load by dropping. currently it ends up waiting for 10240 packets to get queued on the stack before it would decide to drop packets. this may be ok for some machines, but for a lot this was too much. this diff reworks how ifiqs measure how busy the stack is by introducing an ifiq_pressure counter that is incremented when ifiq_input is called, and cleared when ifiq_process calls the network stack to process the queue. if ifiq_input is called multiple times before ifiq_process in a net taskq runs, ifiq_pressure goes up, and ifiq_input uses a high value to decide the stack is busy and it should drop. i was hoping there would be no performance impact from this change, but hrvoje popovski notes a slight bump in forwarding performance. my own testing shows that the ifiq input list length grows to a fraction of the 10240 it used to get to, which means the maximum burst of packets through the stack is smoothed out a bit. instead of big lists of packets followed by big periods of drops, we get relatively small bursts of packets with smaller gaps where we drop. the follow-on from this is to make drivers implementing rx ring moderation to use the return value of ifiq_input to scale the ring allocation down, allowing the hardware to drop packets so software doesnt have to.
Diffstat (limited to 'sys')
-rw-r--r--sys/net/if.c4
-rw-r--r--sys/net/ifq.c20
-rw-r--r--sys/net/ifq.h6
3 files changed, 17 insertions, 13 deletions
diff --git a/sys/net/if.c b/sys/net/if.c
index 9548c21293c..dadafb649eb 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: if.c,v 1.572 2019/02/26 03:20:08 dlg Exp $ */
+/* $OpenBSD: if.c,v 1.573 2019/03/01 04:47:32 dlg Exp $ */
/* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */
/*
@@ -738,7 +738,7 @@ if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
void
if_input(struct ifnet *ifp, struct mbuf_list *ml)
{
- ifiq_input(&ifp->if_rcv, ml, 2048);
+ ifiq_input(&ifp->if_rcv, ml);
}
int
diff --git a/sys/net/ifq.c b/sys/net/ifq.c
index 418f5f7c32b..d6b6e3032b6 100644
--- a/sys/net/ifq.c
+++ b/sys/net/ifq.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ifq.c,v 1.25 2018/12/16 03:36:02 dlg Exp $ */
+/* $OpenBSD: ifq.c,v 1.26 2019/03/01 04:47:33 dlg Exp $ */
/*
* Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
@@ -445,6 +445,7 @@ ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
mtx_init(&ifiq->ifiq_mtx, IPL_NET);
ml_init(&ifiq->ifiq_ml);
task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
+ ifiq->ifiq_pressure = 0;
ifiq->ifiq_qdrops = 0;
ifiq->ifiq_packets = 0;
@@ -467,17 +468,20 @@ ifiq_destroy(struct ifiqueue *ifiq)
ml_purge(&ifiq->ifiq_ml);
}
+unsigned int ifiq_pressure_drop = 16;
+unsigned int ifiq_pressure_return = 2;
+
int
-ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml, unsigned int cwm)
+ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml)
{
struct ifnet *ifp = ifiq->ifiq_if;
struct mbuf *m;
uint64_t packets;
uint64_t bytes = 0;
+ unsigned int pressure;
#if NBPFILTER > 0
caddr_t if_bpf;
#endif
- int rv = 1;
if (ml_empty(ml))
return (0);
@@ -518,12 +522,11 @@ ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml, unsigned int cwm)
ifiq->ifiq_packets += packets;
ifiq->ifiq_bytes += bytes;
- if (ifiq_len(ifiq) >= cwm * 5)
+ pressure = ++ifiq->ifiq_pressure;
+ if (pressure > ifiq_pressure_drop)
ifiq->ifiq_qdrops += ml_len(ml);
- else {
- rv = (ifiq_len(ifiq) >= cwm * 3);
+ else
ml_enlist(&ifiq->ifiq_ml, ml);
- }
mtx_leave(&ifiq->ifiq_mtx);
if (ml_empty(ml))
@@ -531,7 +534,7 @@ ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml, unsigned int cwm)
else
ml_purge(ml);
- return (rv);
+ return (pressure > ifiq_pressure_return);
}
void
@@ -573,6 +576,7 @@ ifiq_process(void *arg)
return;
mtx_enter(&ifiq->ifiq_mtx);
+ ifiq->ifiq_pressure = 0;
ml = ifiq->ifiq_ml;
ml_init(&ifiq->ifiq_ml);
mtx_leave(&ifiq->ifiq_mtx);
diff --git a/sys/net/ifq.h b/sys/net/ifq.h
index 6beb428b176..fdbfc170314 100644
--- a/sys/net/ifq.h
+++ b/sys/net/ifq.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ifq.h,v 1.22 2018/12/11 01:36:42 dlg Exp $ */
+/* $OpenBSD: ifq.h,v 1.23 2019/03/01 04:47:33 dlg Exp $ */
/*
* Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
@@ -80,6 +80,7 @@ struct ifiqueue {
struct mutex ifiq_mtx;
struct mbuf_list ifiq_ml;
struct task ifiq_task;
+ unsigned int ifiq_pressure;
/* counters */
uint64_t ifiq_packets;
@@ -473,8 +474,7 @@ ifq_idx(struct ifqueue *ifq, unsigned int nifqs, const struct mbuf *m)
void ifiq_init(struct ifiqueue *, struct ifnet *, unsigned int);
void ifiq_destroy(struct ifiqueue *);
-int ifiq_input(struct ifiqueue *, struct mbuf_list *,
- unsigned int);
+int ifiq_input(struct ifiqueue *, struct mbuf_list *);
int ifiq_enqueue(struct ifiqueue *, struct mbuf *);
void ifiq_add_data(struct ifiqueue *, struct if_data *);
void ifiq_barrier(struct ifiqueue *);