summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Gwynne <dlg@cvs.openbsd.org>2014-07-08 04:02:15 +0000
committerDavid Gwynne <dlg@cvs.openbsd.org>2014-07-08 04:02:15 +0000
commitf201a4bbc9d2fe8c3664f8b02ebf1ac13ae5b94e (patch)
tree96a3088aaf44c3c3ca309557b32609cd62e09535
parent70577c8ba326731615fbca04fee11092e246d519 (diff)
introduce the if_rxr api. it is intended to pull the rx ring accounting
out of the mbuf layer, and break the assumption that an interface will only have a single ring per mbuf cluster size. mpi@ is ok with moving this forward
-rw-r--r--share/man/man9/Makefile7
-rw-r--r--share/man/man9/if_rxr_init.9148
-rw-r--r--sys/net/if.c103
-rw-r--r--sys/net/if.h22
-rw-r--r--sys/net/if_var.h13
-rw-r--r--sys/sys/sockio.h4
6 files changed, 291 insertions, 6 deletions
diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index d5e5cdcb5d0..55d7c29f870 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -1,4 +1,4 @@
-# $OpenBSD: Makefile,v 1.210 2014/06/30 21:48:09 matthew Exp $
+# $OpenBSD: Makefile,v 1.211 2014/07/08 04:02:14 dlg Exp $
# $NetBSD: Makefile,v 1.4 1996/01/09 03:23:01 thorpej Exp $
# Makefile for section 9 (kernel function and variable) manual pages.
@@ -17,7 +17,7 @@ MAN= aml_evalnode.9 atomic_add_int.9 atomic_cas_uint.9 \
ieee80211.9 ieee80211_crypto.9 ieee80211_input.9 ieee80211_ioctl.9 \
ieee80211_node.9 ieee80211_output.9 ieee80211_proto.9 \
ieee80211_radiotap.9 \
- iic.9 intro.9 inittodr.9 \
+ if_rxr_init.9 iic.9 intro.9 inittodr.9 \
kern.9 km_alloc.9 knote.9 kthread.9 ktrace.9 \
loadfirmware.9 lock.9 log.9 \
malloc.9 membar_sync.9 mbuf.9 mbuf_tags.9 md5.9 mi_switch.9 \
@@ -211,6 +211,9 @@ MLINKS+=ieee80211_proto.9 ieee80211_proto_attach.9 \
ieee80211_proto.9 ieee80211_print_essid.9 \
ieee80211_proto.9 ieee80211_dump_pkt.9 \
ieee80211_proto.9 ieee80211_fix_rate.9
+MLINKS+=if_rxr_init.9 if_rxr_get.9 if_rxr_init.9 if_rxr_put.9 \
+ if_rxr_init.9 if_rxr_inuse.9 if_rxr_init.9 if_rxr_ioctl.9 \
+ if_rxr_init.9 if_rxr_info_ioctl.9
MLINKS+=iic.9 iic_acquire_bus.9 iic.9 iic_release_bus.9 iic.9 iic_exec.9 \
iic.9 iic_smbus_write_byte.9 iic.9 iic_smbus_read_byte.9 \
iic.9 iic_smbus_receive_byte.9
diff --git a/share/man/man9/if_rxr_init.9 b/share/man/man9/if_rxr_init.9
new file mode 100644
index 00000000000..932aa3d6e32
--- /dev/null
+++ b/share/man/man9/if_rxr_init.9
@@ -0,0 +1,148 @@
+.\" $OpenBSD: if_rxr_init.9,v 1.1 2014/07/08 04:02:14 dlg Exp $
+.\"
+.\" Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: July 8 2014 $
+.Dt IF_RXR_INIT 9
+.Os
+.Sh NAME
+.Nm if_rxr_init ,
+.Nm if_rxr_get ,
+.Nm if_rxr_put ,
+.Nm if_ixr_inuse
+.Nd Interface Receive Ring accounting
+.Sh SYNOPSIS
+.In net/if.h
+.Ft void
+.Fn "if_rxr_init" "struct if_rxring *rxr" "unsigned int lwm" "unsigned int hwm"
+.Ft unsigned int
+.Fn "if_rxr_get" "struct if_rxring *rxr" "unsigned int max"
+.Ft void
+.Fn "if_rxr_put" "struct if_rxring *rxr" "unsigned int n"
+.Ft unsigned int
+.Fn "if_rxr_inuse" "struct if_rxring *rxr"
+.Ft int
+.Fn "if_rxr_ioctl" "struct if_rxrinfo *ifri" "const char *name" "unsigned int size" "struct if_rxring *rxr"
+.Ft int
+.Fn "if_rxr_info_ioctl" "struct if_rxrinfo *ifri" "unsigned int n" "struct if_rxring_info *rings"
+.Sh DESCRIPTION
+The Interface Receive Ring accounting API provides a mechanism to
+manage the number of available descriptors on a network cards receive
+ring.
+The API restricts the allocation of receive descriptors using a
+heuristic that monitors the use of the ring.
+The number of descriptors granted on the ring may increase over time
+as the interface proves it uses them.
+Additionally, if the algorithm detects that the system is livelocked
+as a result of being overwhelmed with network traffic, it will
+restrict the number of available receive descriptors.
+.Pp
+.Fn if_rxr_init
+initialises the
+.Fa rxr
+structure.
+The
+.Fa lwm
+argument defines the minimum number of descriptors the chip needs
+to operate the ring correctly.
+.Fa hwm
+is used to describe the maximum number of descriptors the ring can contain.
+.Pp
+.Fn if_rxr_get
+allocates and accounts for up to
+.Fa max
+descriptors in the ring as being used.
+.Pp
+.Fn if_rxr_put
+returns
+.Fa n
+receive descriptor slots to the ring.
+.Pp
+.Fn if_rxr_inuse
+can be used to determine how many descriptor slots have been allocated
+on the ring.
+.Pp
+The
+.Fn if_rxr_ioctl
+and
+.Fn if_rxr_info_ioctl
+functions are provided to assist drivers in reporting their rings
+state to userland via a
+.Dv SIOCGIFRXR
+ioctl request.
+The ioctl data payload will be an ifreq structure, with ifr_data pointing at a
+struct if_rxrinfo in userland memory.
+This if_rxrinfo pointer should be passed via
+.Fa ifri .
+.Pp
+If a driver only has a single receive ring, it may pass the ring state to
+.Fn if_rxr_ioctl
+via the
+.Fa rxr
+argument.
+.Fa size
+is used to describe the size of the mbuf cluster the receive ring uses.
+If the driver wishes to name the ring it can pass it via
+.Fa name ,
+otherwise
+.Dv NULL .
+.Pp
+If the driver has multiple receive rings, it can prepare an array
+of if_rxring_info structures and pass that to
+.Fn if_rxr_info_ioctl
+via
+.Fa rings
+with the number of elements in the array passed via
+.Fa n .
+.Pp
+For the heuristic to work correctly, a driver using this API should
+return all possible descriptor slots with
+.Fa if_rxr_put
+before calling
+.Fa if_rxr_get
+to fill them again.
+.Sh CONTEXT
+.Fn if_rxr_init ,
+.Fn if_rxr_get ,
+.Fn if_rxr_put ,
+and
+.Fn if_rxr_inuse
+can be called during autoconf, from process context, or from interrupt context.
+.Pp
+.Fa if_rxr_ioctl
+and
+.Fa if_rxr_info_ioctl
+can be called from process context, and only from the context of
+the process generating an ioctl call.
+.Pp
+It is up to the caller to provide appropriate locking around calls
+to these functions to prevent inconsistencies in the relevant
+if_rxring data structure.
+.Sh RETURN VALUES
+.Fn if_rxr_get
+returns the number of receive descriptors available on the ring.
+The number of descriptors may be less than the
+.Fa max
+requested.
+.Pp
+.Fn if_rxr_inuse
+returns the number of receive descriptor slots currently in use on the ring.
+.Sh SEE ALSO
+.Xr autoconf 9
+.Sh HISTORY
+The Interface Receive Ring API was originally written by
+.An David Gwynne Aq Mt dlg@openbsd.org .
+The API first appeared in
+.Ox 5.6 .
diff --git a/sys/net/if.c b/sys/net/if.c
index 3169e06ab61..fd0e8539ca6 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: if.c,v 1.292 2014/06/26 13:08:25 mpi Exp $ */
+/* $OpenBSD: if.c,v 1.293 2014/07/08 04:02:14 dlg Exp $ */
/* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */
/*
@@ -178,6 +178,8 @@ int if_cloners_count;
struct pool ifaddr_item_pl;
+int net_livelocked(void);
+
/*
* Network interface utility routines.
*
@@ -2379,3 +2381,102 @@ ifnewlladdr(struct ifnet *ifp)
}
splx(s);
}
+
+int
+net_livelocked()
+{
+ extern int ticks;
+ extern int m_clticks;
+
+ return (ticks - m_clticks > 1);
+}
+
+void
+if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
+{
+ extern int ticks;
+
+ memset(rxr, 0, sizeof(*rxr));
+
+ rxr->rxr_adjusted = ticks;
+ rxr->rxr_cwm = rxr->rxr_lwm = lwm;
+ rxr->rxr_hwm = hwm;
+}
+
+static inline void
+if_rxr_adjust_cwm(struct if_rxring *rxr)
+{
+ extern int ticks;
+
+ if (net_livelocked()) {
+ if (rxr->rxr_cwm > rxr->rxr_lwm)
+ rxr->rxr_cwm--;
+ else
+ return;
+ } else if (rxr->rxr_alive > 4)
+ return;
+ else if (rxr->rxr_cwm < rxr->rxr_hwm)
+ rxr->rxr_cwm++;
+
+ rxr->rxr_adjusted = ticks;
+}
+
+u_int
+if_rxr_get(struct if_rxring *rxr, u_int max)
+{
+ extern int ticks;
+ u_int diff;
+
+ if (ticks - rxr->rxr_adjusted >= 1) {
+ /* we're free to try for an adjustment */
+ if_rxr_adjust_cwm(rxr);
+ }
+
+ if (rxr->rxr_alive >= rxr->rxr_cwm)
+ return (0);
+
+ diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
+ rxr->rxr_alive += diff;
+
+ return (diff);
+}
+
+int
+if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
+{
+ struct if_rxrinfo kifri;
+ int error;
+ u_int n;
+
+ error = copyin(uifri, &kifri, sizeof(kifri));
+ if (error)
+ return (error);
+
+ n = min(t, kifri.ifri_total);
+ kifri.ifri_total = t;
+
+ if (n > 0) {
+ error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
+ if (error)
+ return (error);
+ }
+
+ return (copyout(&kifri, uifri, sizeof(kifri)));
+}
+
+int
+if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
+ struct if_rxring *rxr)
+{
+ struct if_rxring_info ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ if (name != NULL)
+ strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+
+ ifr.ifr_size = size;
+ ifr.ifr_info = *rxr;
+
+ return (if_rxr_info_ioctl(ifri, 1, &ifr));
+}
diff --git a/sys/net/if.h b/sys/net/if.h
index 696e5a14fa7..23c0bec0d30 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: if.h,v 1.154 2014/06/13 07:28:12 mpi Exp $ */
+/* $OpenBSD: if.h,v 1.155 2014/07/08 04:02:14 dlg Exp $ */
/* $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $ */
/*
@@ -79,6 +79,26 @@ struct mclpool {
u_short mcl_lwm;
};
+struct if_rxring {
+ int rxr_adjusted;
+ u_int rxr_alive;
+ u_int rxr_cwm;
+ u_int rxr_lwm;
+ u_int rxr_hwm;
+};
+
+struct if_rxring_info {
+ char ifr_name[16]; /* name of the ring */
+ u_int ifr_size; /* size of the packets on the ring */
+ struct if_rxring ifr_info;
+};
+
+/* Structure used in SIOCGIFRXR request. */
+struct if_rxrinfo {
+ u_int ifri_total;
+ struct if_rxring_info *ifri_entries;
+};
+
/*
* Structure defining statistics and other data kept regarding a network
* interface.
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index 86ca9dd4d30..cd40d8becdd 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_var.h,v 1.11 2014/05/26 08:33:48 mpi Exp $ */
+/* $OpenBSD: if_var.h,v 1.12 2014/07/08 04:02:14 dlg Exp $ */
/* $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $ */
/*
@@ -449,6 +449,17 @@ void ifa_add(struct ifnet *, struct ifaddr *);
void ifa_del(struct ifnet *, struct ifaddr *);
void ifa_update_broadaddr(struct ifnet *, struct ifaddr *,
struct sockaddr *);
+
+void if_rxr_init(struct if_rxring *, u_int, u_int);
+u_int if_rxr_get(struct if_rxring *, u_int);
+
+#define if_rxr_put(_r, _c) do { (_r)->rxr_alive -= (_c); } while (0)
+#define if_rxr_inuse(_r) ((_r)->rxr_alive)
+
+int if_rxr_info_ioctl(struct if_rxrinfo *, u_int, struct if_rxring_info *);
+int if_rxr_ioctl(struct if_rxrinfo *, const char *, u_int,
+ struct if_rxring *);
+
#endif /* _KERNEL */
#endif /* _NET_IF_VAR_H_ */
diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h
index aa220bb6d69..3b96f210d57 100644
--- a/sys/sys/sockio.h
+++ b/sys/sys/sockio.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: sockio.h,v 1.53 2013/10/13 10:10:04 reyk Exp $ */
+/* $OpenBSD: sockio.h,v 1.54 2014/07/08 04:02:14 dlg Exp $ */
/* $NetBSD: sockio.h,v 1.5 1995/08/23 00:40:47 thorpej Exp $ */
/*-
@@ -199,4 +199,6 @@
#define SIOCSETPFLOW _IOW('i', 253, struct ifreq)
#define SIOCGETPFLOW _IOWR('i', 254, struct ifreq)
+#define SIOCGIFRXR _IOW('i', 170, struct ifreq)
+
#endif /* !_SYS_SOCKIO_H_ */