/* $OpenBSD: ifq.h,v 1.32 2020/07/07 00:00:03 dlg Exp $ */ /* * Copyright (c) 2015 David Gwynne <dlg@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef _NET_IFQ_H_ #define _NET_IFQ_H_ struct ifnet; struct kstat; struct ifq_ops; struct ifqueue { struct ifnet *ifq_if; struct taskq *ifq_softnet; union { void *_ifq_softc; /* * a rings sndq is found by looking up an array of pointers. * by default we only have one sndq and the default drivers * dont use ifq_softc, so we can borrow it for the map until * we need to allocate a proper map. */ struct ifqueue *_ifq_ifqs[1]; } _ifq_ptr; #define ifq_softc _ifq_ptr._ifq_softc #define ifq_ifqs _ifq_ptr._ifq_ifqs /* mbuf handling */ struct mutex ifq_mtx; const struct ifq_ops *ifq_ops; void *ifq_q; struct mbuf_list ifq_free; unsigned int ifq_len; unsigned int ifq_oactive; /* statistics */ uint64_t ifq_packets; uint64_t ifq_bytes; uint64_t ifq_qdrops; uint64_t ifq_errors; uint64_t ifq_mcasts; struct kstat *ifq_kstat; /* work serialisation */ struct mutex ifq_task_mtx; struct task_list ifq_task_list; void *ifq_serializer; struct task ifq_bundle; /* work to be serialised */ struct task ifq_start; struct task ifq_restart; /* properties */ unsigned int ifq_maxlen; unsigned int ifq_idx; }; struct ifiqueue { struct ifnet *ifiq_if; struct taskq *ifiq_softnet; union { void *_ifiq_softc; struct ifiqueue *_ifiq_ifiqs[1]; } _ifiq_ptr; #define ifiq_softc _ifiq_ptr._ifiq_softc #define ifiq_ifiqs _ifiq_ptr._ifiq_ifiqs struct mutex ifiq_mtx; struct mbuf_list ifiq_ml; struct task ifiq_task; unsigned int ifiq_pressure; /* counters */ uint64_t ifiq_packets; uint64_t ifiq_bytes; uint64_t ifiq_qdrops; uint64_t ifiq_errors; uint64_t ifiq_mcasts; uint64_t ifiq_noproto; struct kstat *ifiq_kstat; /* properties */ unsigned int ifiq_idx; }; #ifdef _KERNEL #define IFQ_MAXLEN 256 /* * * Interface Send Queues * * struct ifqueue sits between the network stack and a drivers * transmission of packets. The high level view is that when the stack * has finished generating a packet it hands it to a driver for * transmission. It does this by queueing the packet on an ifqueue and * notifying the driver to start transmission of the queued packets. * * A network device may have multiple contexts for the transmission * of packets, ie, independent transmit rings. Such a network device, * represented by a struct ifnet, would then have multiple ifqueue * structures, each of which maps to an independent transmit ring. * * struct ifqueue also provides the point where conditioning of * traffic (ie, priq and hfsc) is implemented, and provides some * infrastructure to assist in the implementation of network drivers. * * = ifq API * * The ifq API provides functions for three distinct consumers: * * 1. The network stack * 2. Traffic QoS/conditioning implementations * 3. Network drivers * * == Network Stack API * * The network stack is responsible for initialising and destroying * the ifqueue structures, changing the traffic conditioner on an * interface, enqueuing packets for transmission, and notifying * the driver to start transmission of a particular ifqueue. * * === ifq_init() * * During if_attach(), the network stack calls ifq_init to initialise * the ifqueue structure. By default it configures the priq traffic * conditioner. * * === ifq_destroy() * * The network stack calls ifq_destroy() during if_detach to tear down * the ifqueue structure. It frees the traffic conditioner state, and * frees any mbufs that were left queued. * * === ifq_attach() * * ifq_attach() is used to replace the current traffic conditioner on * the ifqueue. All the pending mbufs are removed from the previous * conditioner and requeued on the new. * * === ifq_idx() * * ifq_idx() selects a specific ifqueue from the current ifnet * structure for use in the transmission of the mbuf. * * === ifq_enqueue() * * ifq_enqueue() attempts to fit an mbuf onto the ifqueue. The * current traffic conditioner may drop a packet to make space on the * queue. * * === ifq_start() * * Once a packet has been successfully queued with ifq_enqueue(), * the network card is notified with a call to ifq_start(). * Calls to ifq_start() run in the ifqueue serialisation context, * guaranteeing that only one instance of ifp->if_qstart() will be * running on behalf of a specific ifqueue in the system at any point * in time. * * == Traffic conditioners API * * The majority of interaction between struct ifqueue and a traffic * conditioner occurs via the callbacks a traffic conditioner provides * in an instance of struct ifq_ops. * * XXX document ifqop_* * * The ifqueue API implements the locking on behalf of the conditioning * implementations so conditioners only have to reject or keep mbufs. * If something needs to inspect a conditioners internals, the queue lock * needs to be taken to allow for a consistent or safe view. The queue * lock may be taken and released with ifq_q_enter() and ifq_q_leave(). * * === ifq_q_enter() * * Code wishing to access a conditioners internals may take the queue * lock with ifq_q_enter(). The caller must pass a reference to the * conditioners ifq_ops structure so the infrastructure can ensure the * caller is able to understand the internals. ifq_q_enter() returns * a pointer to the conditioners internal structures, or NULL if the * ifq_ops did not match the current conditioner. * * === ifq_q_leave() * * The queue lock acquired with ifq_q_enter() is released with * ifq_q_leave(). * * === ifq_mfreem() and ifq_mfreeml() * * A goal of the API is to avoid freeing an mbuf while mutexs are * held. Because the ifq API manages the lock on behalf of the backend * ifqops, the backend should not directly free mbufs. If a conditioner * backend needs to drop a packet during the handling of ifqop_deq_begin, * it may free it by calling ifq_mfreem(). This accounts for the drop, * and schedules the free of the mbuf outside the hold of ifq_mtx. * ifq_mfreeml() takes an mbuf list as an argument instead. * * * == Network Driver API * * The API used by network drivers is mostly documented in the * ifq_dequeue(9) manpage except for ifq_serialize(). * * === ifq_serialize() * * A driver may run arbitrary work in the ifqueue serialiser context * via ifq_serialize(). The work to be done is represented by a task * that has been prepared with task_set. * * The work will be run in series with any other work dispatched by * ifq_start(), ifq_restart(), or other ifq_serialize() calls. * * Because the work may be run on another CPU, the lifetime of the * task and the work it represents can extend beyond the end of the * call to ifq_serialize() that dispatched it. * * * = ifqueue work serialisation * * ifqueues provide a mechanism to dispatch work to be run in a single * context. Work in this mechanism is represented by task structures. * * The tasks are run in a context similar to a taskq serviced by a * single kernel thread, except the work is run immediately by the * first CPU that dispatches work. If a second CPU attempts to dispatch * additional tasks while the first is still running, it will be queued * to be run by the first CPU. The second CPU will return immediately. * * = MP Safe Network Drivers * * An MP safe network driver is one in which its start routine can be * called by the network stack without holding the big kernel lock. * * == Attach * * A driver advertises it's ability to run its start routine without * the kernel lock by setting the IFXF_MPSAFE flag in ifp->if_xflags * before calling if_attach(). Advertising an MPSAFE start routine * also implies that the driver understands that a network card can * have multiple rings or transmit queues, and therefore provides * if_qstart function (which takes an ifqueue pointer) instead of an * if_start function (which takes an ifnet pointer). * * If the hardware supports multiple transmit rings, it advertises * support for multiple rings to the network stack with if_attach_queues() * after the call to if_attach(). if_attach_queues allocates a struct * ifqueue for each hardware ring, which can then be initialised by * the driver with data for each ring. * * void drv_start(struct ifqueue *); * * void * drv_attach() * { * ... * ifp->if_xflags = IFXF_MPSAFE; * ifp->if_qstart = drv_start; * if_attach(ifp); * * if_attach_queues(ifp, DRV_NUM_TX_RINGS); * for (i = ; i < DRV_NUM_TX_RINGS; i++) { * struct ifqueue *ifq = ifp->if_ifqs[i]; * struct drv_tx_ring *ring = &sc->sc_tx_rings[i]; * * ifq->ifq_softc = ring; * ring->ifq = ifq; * } * } * * The network stack will then call ifp->if_qstart via ifq_start() * to guarantee there is only one instance of that function running * for each ifq in the system, and to serialise it with other work * the driver may provide. * * == Initialise * * When the stack requests an interface be brought up (ie, drv_ioctl() * is called to handle SIOCSIFFLAGS with IFF_UP set in ifp->if_flags) * drivers should set IFF_RUNNING in ifp->if_flags, and then call * ifq_clr_oactive() against each ifq. * * == if_start * * ifq_start() checks that IFF_RUNNING is set in ifp->if_flags, that * ifq_is_oactive() does not return true, and that there are pending * packets to transmit via a call to ifq_len(). Therefore, drivers are * no longer responsible for doing this themselves. * * If a driver should not transmit packets while its link is down, use * ifq_purge() to flush pending packets from the transmit queue. * * Drivers for hardware should use the following pattern to transmit * packets: * * void * drv_start(struct ifqueue *ifq) * { * struct drv_tx_ring *ring = ifq->ifq_softc; * struct ifnet *ifp = ifq->ifq_if; * struct drv_softc *sc = ifp->if_softc; * struct mbuf *m; * int kick = 0; * * if (NO_LINK) { * ifq_purge(ifq); * return; * } * * for (;;) { * if (NO_SPACE(ring)) { * ifq_set_oactive(ifq); * break; * } * * m = ifq_dequeue(ifq); * if (m == NULL) * break; * * if (drv_encap(sc, ring, m) != 0) { // map and fill ring * m_freem(m); * continue; * } * * bpf_mtap(); * } * * drv_kick(ring); // notify hw of new descriptors on the ring * } * * == Transmission completion * * The following pattern should be used for transmit queue interrupt * processing: * * void * drv_txeof(struct drv_tx_ring *ring) * { * struct ifqueue *ifq = ring->ifq; * * while (COMPLETED_PKTS(ring)) { * // unmap packets, m_freem() the mbufs. * } * * if (ifq_is_oactive(ifq)) * ifq_restart(ifq); * } * * == Stop * * Bringing an interface down (ie, IFF_UP was cleared in ifp->if_flags) * should clear IFF_RUNNING in ifp->if_flags, and guarantee the start * routine is not running before freeing any resources it uses: * * void * drv_down(struct drv_softc *sc) * { * struct ifnet *ifp = &sc->sc_if; * struct ifqueue *ifq; * int i; * * CLR(ifp->if_flags, IFF_RUNNING); * DISABLE_INTERRUPTS(); * * for (i = 0; i < sc->sc_num_queues; i++) { * ifq = ifp->if_ifqs[i]; * ifq_barrier(ifq); * } * * intr_barrier(sc->sc_ih); * * FREE_RESOURCES(); * * for (i = 0; i < sc->sc_num_queues; i++) { * ifq = ifp->if_ifqs[i]; * ifq_clr_oactive(ifq); * } * } * */ struct ifq_ops { unsigned int (*ifqop_idx)(unsigned int, const struct mbuf *); struct mbuf *(*ifqop_enq)(struct ifqueue *, struct mbuf *); struct mbuf *(*ifqop_deq_begin)(struct ifqueue *, void **); void (*ifqop_deq_commit)(struct ifqueue *, struct mbuf *, void *); void (*ifqop_purge)(struct ifqueue *, struct mbuf_list *); void *(*ifqop_alloc)(unsigned int, void *); void (*ifqop_free)(unsigned int, void *); }; extern const struct ifq_ops * const ifq_priq_ops; /* * Interface send queues. */ void ifq_init(struct ifqueue *, struct ifnet *, unsigned int); void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *); void ifq_destroy(struct ifqueue *); void ifq_add_data(struct ifqueue *, struct if_data *); int ifq_enqueue(struct ifqueue *, struct mbuf *); void ifq_start(struct ifqueue *); struct mbuf *ifq_deq_begin(struct ifqueue *); void ifq_deq_commit(struct ifqueue *, struct mbuf *); void ifq_deq_rollback(struct ifqueue *, struct mbuf *); struct mbuf *ifq_dequeue(struct ifqueue *); int ifq_hdatalen(struct ifqueue *); void ifq_mfreem(struct ifqueue *, struct mbuf *); void ifq_mfreeml(struct ifqueue *, struct mbuf_list *); unsigned int ifq_purge(struct ifqueue *); void *ifq_q_enter(struct ifqueue *, const struct ifq_ops *); void ifq_q_leave(struct ifqueue *, void *); void ifq_serialize(struct ifqueue *, struct task *); void ifq_barrier(struct ifqueue *); int ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int, const char *, volatile unsigned int *, volatile unsigned int *); #define ifq_len(_ifq) ((_ifq)->ifq_len) #define ifq_empty(_ifq) (ifq_len(_ifq) == 0) #define ifq_set_maxlen(_ifq, _l) ((_ifq)->ifq_maxlen = (_l)) static inline int ifq_is_priq(struct ifqueue *ifq) { return (ifq->ifq_ops == ifq_priq_ops); } static inline void ifq_set_oactive(struct ifqueue *ifq) { ifq->ifq_oactive = 1; } static inline void ifq_clr_oactive(struct ifqueue *ifq) { ifq->ifq_oactive = 0; } static inline unsigned int ifq_is_oactive(struct ifqueue *ifq) { return (ifq->ifq_oactive); } static inline void ifq_restart(struct ifqueue *ifq) { ifq_serialize(ifq, &ifq->ifq_restart); } static inline unsigned int ifq_idx(struct ifqueue *ifq, unsigned int nifqs, const struct mbuf *m) { return ((*ifq->ifq_ops->ifqop_idx)(nifqs, m)); } /* ifiq */ void ifiq_init(struct ifiqueue *, struct ifnet *, unsigned int); void ifiq_destroy(struct ifiqueue *); int ifiq_input(struct ifiqueue *, struct mbuf_list *); int ifiq_enqueue(struct ifiqueue *, struct mbuf *); void ifiq_add_data(struct ifiqueue *, struct if_data *); #define ifiq_len(_ifiq) ml_len(&(_ifiq)->ifiq_ml) #define ifiq_empty(_ifiq) ml_empty(&(_ifiq)->ifiq_ml) #endif /* _KERNEL */ #endif /* _NET_IFQ_H_ */