diff options
author | David Gwynne <dlg@cvs.openbsd.org> | 2014-07-08 04:02:15 +0000 |
---|---|---|
committer | David Gwynne <dlg@cvs.openbsd.org> | 2014-07-08 04:02:15 +0000 |
commit | f201a4bbc9d2fe8c3664f8b02ebf1ac13ae5b94e (patch) | |
tree | 96a3088aaf44c3c3ca309557b32609cd62e09535 | |
parent | 70577c8ba326731615fbca04fee11092e246d519 (diff) |
introduce the if_rxr api. it is intended to pull the rx ring accounting
out of the mbuf layer, and break the assumption that an interface will
only have a single ring per mbuf cluster size.
mpi@ is ok with moving this forward
-rw-r--r-- | share/man/man9/Makefile | 7 | ||||
-rw-r--r-- | share/man/man9/if_rxr_init.9 | 148 | ||||
-rw-r--r-- | sys/net/if.c | 103 | ||||
-rw-r--r-- | sys/net/if.h | 22 | ||||
-rw-r--r-- | sys/net/if_var.h | 13 | ||||
-rw-r--r-- | sys/sys/sockio.h | 4 |
6 files changed, 291 insertions, 6 deletions
diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index d5e5cdcb5d0..55d7c29f870 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile,v 1.210 2014/06/30 21:48:09 matthew Exp $ +# $OpenBSD: Makefile,v 1.211 2014/07/08 04:02:14 dlg Exp $ # $NetBSD: Makefile,v 1.4 1996/01/09 03:23:01 thorpej Exp $ # Makefile for section 9 (kernel function and variable) manual pages. @@ -17,7 +17,7 @@ MAN= aml_evalnode.9 atomic_add_int.9 atomic_cas_uint.9 \ ieee80211.9 ieee80211_crypto.9 ieee80211_input.9 ieee80211_ioctl.9 \ ieee80211_node.9 ieee80211_output.9 ieee80211_proto.9 \ ieee80211_radiotap.9 \ - iic.9 intro.9 inittodr.9 \ + if_rxr_init.9 iic.9 intro.9 inittodr.9 \ kern.9 km_alloc.9 knote.9 kthread.9 ktrace.9 \ loadfirmware.9 lock.9 log.9 \ malloc.9 membar_sync.9 mbuf.9 mbuf_tags.9 md5.9 mi_switch.9 \ @@ -211,6 +211,9 @@ MLINKS+=ieee80211_proto.9 ieee80211_proto_attach.9 \ ieee80211_proto.9 ieee80211_print_essid.9 \ ieee80211_proto.9 ieee80211_dump_pkt.9 \ ieee80211_proto.9 ieee80211_fix_rate.9 +MLINKS+=if_rxr_init.9 if_rxr_get.9 if_rxr_init.9 if_rxr_put.9 \ + if_rxr_init.9 if_rxr_inuse.9 if_rxr_init.9 if_rxr_ioctl.9 \ + if_rxr_init.9 if_rxr_info_ioctl.9 MLINKS+=iic.9 iic_acquire_bus.9 iic.9 iic_release_bus.9 iic.9 iic_exec.9 \ iic.9 iic_smbus_write_byte.9 iic.9 iic_smbus_read_byte.9 \ iic.9 iic_smbus_receive_byte.9 diff --git a/share/man/man9/if_rxr_init.9 b/share/man/man9/if_rxr_init.9 new file mode 100644 index 00000000000..932aa3d6e32 --- /dev/null +++ b/share/man/man9/if_rxr_init.9 @@ -0,0 +1,148 @@ +.\" $OpenBSD: if_rxr_init.9,v 1.1 2014/07/08 04:02:14 dlg Exp $ +.\" +.\" Copyright (c) 2014 David Gwynne <dlg@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: July 8 2014 $ +.Dt IF_RXR_INIT 9 +.Os +.Sh NAME +.Nm if_rxr_init , +.Nm if_rxr_get , +.Nm if_rxr_put , +.Nm if_ixr_inuse +.Nd Interface Receive Ring accounting +.Sh SYNOPSIS +.In net/if.h +.Ft void +.Fn "if_rxr_init" "struct if_rxring *rxr" "unsigned int lwm" "unsigned int hwm" +.Ft unsigned int +.Fn "if_rxr_get" "struct if_rxring *rxr" "unsigned int max" +.Ft void +.Fn "if_rxr_put" "struct if_rxring *rxr" "unsigned int n" +.Ft unsigned int +.Fn "if_rxr_inuse" "struct if_rxring *rxr" +.Ft int +.Fn "if_rxr_ioctl" "struct if_rxrinfo *ifri" "const char *name" "unsigned int size" "struct if_rxring *rxr" +.Ft int +.Fn "if_rxr_info_ioctl" "struct if_rxrinfo *ifri" "unsigned int n" "struct if_rxring_info *rings" +.Sh DESCRIPTION +The Interface Receive Ring accounting API provides a mechanism to +manage the number of available descriptors on a network cards receive +ring. +The API restricts the allocation of receive descriptors using a +heuristic that monitors the use of the ring. +The number of descriptors granted on the ring may increase over time +as the interface proves it uses them. +Additionally, if the algorithm detects that the system is livelocked +as a result of being overwhelmed with network traffic, it will +restrict the number of available receive descriptors. +.Pp +.Fn if_rxr_init +initialises the +.Fa rxr +structure. +The +.Fa lwm +argument defines the minimum number of descriptors the chip needs +to operate the ring correctly. +.Fa hwm +is used to describe the maximum number of descriptors the ring can contain. +.Pp +.Fn if_rxr_get +allocates and accounts for up to +.Fa max +descriptors in the ring as being used. +.Pp +.Fn if_rxr_put +returns +.Fa n +receive descriptor slots to the ring. +.Pp +.Fn if_rxr_inuse +can be used to determine how many descriptor slots have been allocated +on the ring. +.Pp +The +.Fn if_rxr_ioctl +and +.Fn if_rxr_info_ioctl +functions are provided to assist drivers in reporting their rings +state to userland via a +.Dv SIOCGIFRXR +ioctl request. +The ioctl data payload will be an ifreq structure, with ifr_data pointing at a +struct if_rxrinfo in userland memory. +This if_rxrinfo pointer should be passed via +.Fa ifri . +.Pp +If a driver only has a single receive ring, it may pass the ring state to +.Fn if_rxr_ioctl +via the +.Fa rxr +argument. +.Fa size +is used to describe the size of the mbuf cluster the receive ring uses. +If the driver wishes to name the ring it can pass it via +.Fa name , +otherwise +.Dv NULL . +.Pp +If the driver has multiple receive rings, it can prepare an array +of if_rxring_info structures and pass that to +.Fn if_rxr_info_ioctl +via +.Fa rings +with the number of elements in the array passed via +.Fa n . +.Pp +For the heuristic to work correctly, a driver using this API should +return all possible descriptor slots with +.Fa if_rxr_put +before calling +.Fa if_rxr_get +to fill them again. +.Sh CONTEXT +.Fn if_rxr_init , +.Fn if_rxr_get , +.Fn if_rxr_put , +and +.Fn if_rxr_inuse +can be called during autoconf, from process context, or from interrupt context. +.Pp +.Fa if_rxr_ioctl +and +.Fa if_rxr_info_ioctl +can be called from process context, and only from the context of +the process generating an ioctl call. +.Pp +It is up to the caller to provide appropriate locking around calls +to these functions to prevent inconsistencies in the relevant +if_rxring data structure. +.Sh RETURN VALUES +.Fn if_rxr_get +returns the number of receive descriptors available on the ring. +The number of descriptors may be less than the +.Fa max +requested. +.Pp +.Fn if_rxr_inuse +returns the number of receive descriptor slots currently in use on the ring. +.Sh SEE ALSO +.Xr autoconf 9 +.Sh HISTORY +The Interface Receive Ring API was originally written by +.An David Gwynne Aq Mt dlg@openbsd.org . +The API first appeared in +.Ox 5.6 . diff --git a/sys/net/if.c b/sys/net/if.c index 3169e06ab61..fd0e8539ca6 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if.c,v 1.292 2014/06/26 13:08:25 mpi Exp $ */ +/* $OpenBSD: if.c,v 1.293 2014/07/08 04:02:14 dlg Exp $ */ /* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */ /* @@ -178,6 +178,8 @@ int if_cloners_count; struct pool ifaddr_item_pl; +int net_livelocked(void); + /* * Network interface utility routines. * @@ -2379,3 +2381,102 @@ ifnewlladdr(struct ifnet *ifp) } splx(s); } + +int +net_livelocked() +{ + extern int ticks; + extern int m_clticks; + + return (ticks - m_clticks > 1); +} + +void +if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm) +{ + extern int ticks; + + memset(rxr, 0, sizeof(*rxr)); + + rxr->rxr_adjusted = ticks; + rxr->rxr_cwm = rxr->rxr_lwm = lwm; + rxr->rxr_hwm = hwm; +} + +static inline void +if_rxr_adjust_cwm(struct if_rxring *rxr) +{ + extern int ticks; + + if (net_livelocked()) { + if (rxr->rxr_cwm > rxr->rxr_lwm) + rxr->rxr_cwm--; + else + return; + } else if (rxr->rxr_alive > 4) + return; + else if (rxr->rxr_cwm < rxr->rxr_hwm) + rxr->rxr_cwm++; + + rxr->rxr_adjusted = ticks; +} + +u_int +if_rxr_get(struct if_rxring *rxr, u_int max) +{ + extern int ticks; + u_int diff; + + if (ticks - rxr->rxr_adjusted >= 1) { + /* we're free to try for an adjustment */ + if_rxr_adjust_cwm(rxr); + } + + if (rxr->rxr_alive >= rxr->rxr_cwm) + return (0); + + diff = min(rxr->rxr_cwm - rxr->rxr_alive, max); + rxr->rxr_alive += diff; + + return (diff); +} + +int +if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e) +{ + struct if_rxrinfo kifri; + int error; + u_int n; + + error = copyin(uifri, &kifri, sizeof(kifri)); + if (error) + return (error); + + n = min(t, kifri.ifri_total); + kifri.ifri_total = t; + + if (n > 0) { + error = copyout(e, kifri.ifri_entries, sizeof(*e) * n); + if (error) + return (error); + } + + return (copyout(&kifri, uifri, sizeof(kifri))); +} + +int +if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size, + struct if_rxring *rxr) +{ + struct if_rxring_info ifr; + + memset(&ifr, 0, sizeof(ifr)); + + if (name != NULL) + strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); + + ifr.ifr_size = size; + ifr.ifr_info = *rxr; + + return (if_rxr_info_ioctl(ifri, 1, &ifr)); +} diff --git a/sys/net/if.h b/sys/net/if.h index 696e5a14fa7..23c0bec0d30 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if.h,v 1.154 2014/06/13 07:28:12 mpi Exp $ */ +/* $OpenBSD: if.h,v 1.155 2014/07/08 04:02:14 dlg Exp $ */ /* $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $ */ /* @@ -79,6 +79,26 @@ struct mclpool { u_short mcl_lwm; }; +struct if_rxring { + int rxr_adjusted; + u_int rxr_alive; + u_int rxr_cwm; + u_int rxr_lwm; + u_int rxr_hwm; +}; + +struct if_rxring_info { + char ifr_name[16]; /* name of the ring */ + u_int ifr_size; /* size of the packets on the ring */ + struct if_rxring ifr_info; +}; + +/* Structure used in SIOCGIFRXR request. */ +struct if_rxrinfo { + u_int ifri_total; + struct if_rxring_info *ifri_entries; +}; + /* * Structure defining statistics and other data kept regarding a network * interface. diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 86ca9dd4d30..cd40d8becdd 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_var.h,v 1.11 2014/05/26 08:33:48 mpi Exp $ */ +/* $OpenBSD: if_var.h,v 1.12 2014/07/08 04:02:14 dlg Exp $ */ /* $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $ */ /* @@ -449,6 +449,17 @@ void ifa_add(struct ifnet *, struct ifaddr *); void ifa_del(struct ifnet *, struct ifaddr *); void ifa_update_broadaddr(struct ifnet *, struct ifaddr *, struct sockaddr *); + +void if_rxr_init(struct if_rxring *, u_int, u_int); +u_int if_rxr_get(struct if_rxring *, u_int); + +#define if_rxr_put(_r, _c) do { (_r)->rxr_alive -= (_c); } while (0) +#define if_rxr_inuse(_r) ((_r)->rxr_alive) + +int if_rxr_info_ioctl(struct if_rxrinfo *, u_int, struct if_rxring_info *); +int if_rxr_ioctl(struct if_rxrinfo *, const char *, u_int, + struct if_rxring *); + #endif /* _KERNEL */ #endif /* _NET_IF_VAR_H_ */ diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h index aa220bb6d69..3b96f210d57 100644 --- a/sys/sys/sockio.h +++ b/sys/sys/sockio.h @@ -1,4 +1,4 @@ -/* $OpenBSD: sockio.h,v 1.53 2013/10/13 10:10:04 reyk Exp $ */ +/* $OpenBSD: sockio.h,v 1.54 2014/07/08 04:02:14 dlg Exp $ */ /* $NetBSD: sockio.h,v 1.5 1995/08/23 00:40:47 thorpej Exp $ */ /*- @@ -199,4 +199,6 @@ #define SIOCSETPFLOW _IOW('i', 253, struct ifreq) #define SIOCGETPFLOW _IOWR('i', 254, struct ifreq) +#define SIOCGIFRXR _IOW('i', 170, struct ifreq) + #endif /* !_SYS_SOCKIO_H_ */ |