summaryrefslogtreecommitdiff
path: root/sys/net/ifq.h
blob: cf53bc702b0657b6814d1a2a66281e817704e590 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
/*	$OpenBSD: ifq.h,v 1.6 2017/01/20 03:48:03 dlg Exp $ */

/*
 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#ifndef _NET_IFQ_H_
#define _NET_IFQ_H_

struct ifnet;

struct ifq_ops;

struct ifqueue {
	struct ifnet		*ifq_if;

	/* mbuf handling */
	struct mutex		 ifq_mtx;
	const struct ifq_ops	*ifq_ops;
	void			*ifq_q;
	unsigned int		 ifq_len;
	unsigned int		 ifq_oactive;

	/* statistics */
	uint64_t		 ifq_packets;
	uint64_t		 ifq_bytes;
	uint64_t		 ifq_qdrops;
	uint64_t		 ifq_errors;
	uint64_t		 ifq_mcasts;

	/* work serialisation */
	struct mutex		 ifq_task_mtx;
	struct task_list	 ifq_task_list;
	void			*ifq_serializer;

	/* work to be serialised */
	struct task		 ifq_start;
	struct task		 ifq_restart;

	unsigned int		 ifq_maxlen;
};

#ifdef _KERNEL

#define IFQ_MAXLEN		256

/*
 *
 * Interface Send Queues
 *
 * struct ifqueue sits between the network stack and a drivers
 * transmission of packets. The high level view is that when the stack
 * has finished generating a packet it hands it to a driver for
 * transmission. It does this by queueing the packet on an ifqueue and
 * notifying the driver to start transmission of the queued packets.
 *
 * struct ifqueue also provides the point where conditioning of
 * traffic (ie, priq and hfsc) is implemented, and provides some
 * infrastructure to assist in the implementation of network drivers.
 *
 * = ifq API
 *
 * The ifq API provides functions for three distinct consumers:
 *
 * 1. The network stack
 * 2. Traffic QoS/conditioning implementations
 * 3. Network drivers
 *
 * == Network Stack API
 *
 * The network stack is responsible for initialising and destroying
 * the ifqueue structure, changing the traffic conditioner on an
 * interface queue, enqueuing packets for transmission, and notifying
 * the driver to start transmission.
 *
 * === ifq_init()
 *
 * During if_attach(), the network stack calls ifq_init to initialise
 * the ifqueue structure. By default it configures the priq traffic
 * conditioner.
 *
 * === ifq_destroy()
 *
 * The network stack calls ifq_destroy() during if_detach to tear down
 * the ifqueue structure. It frees the traffic conditioner state, and
 * frees any mbufs that were left queued.
 *
 * === ifq_attach()
 *
 * ifq_attach() is used to replace the current traffic conditioner on
 * the ifqueue. All the pending mbufs are removed from the previous
 * conditioner and requeued on the new.
 *
 * === ifq_enqueue() and ifq_enqueue_try()
 *
 * ifq_enqueue() and ifq_enqueue_try() attempt to fit an mbuf onto the
 * ifqueue. If the current traffic conditioner rejects the packet it
 * wont be queued and will be counted as a drop. ifq_enqueue() will
 * free the mbuf on the callers behalf if the packet is rejected.
 * ifq_enqueue_try() does not free the mbuf, allowing the caller to
 * reuse it.
 *
 * === ifq_start()
 *
 * Once a packet has been successfully queued with ifq_enqueue() or
 * ifq_enqueue_try(), the network card is notified with a call to
 * if_start(). If an interface is marked with IFXF_MPSAFE in its
 * if_xflags field, if_start() calls ifq_start() to dispatch the
 * interfaces start routine. Calls to ifq_start() run in the ifqueue
 * serialisation context, guaranteeing that only one instance of
 * ifp->if_start() will be running in the system at any point in time.
 *
 *
 * == Traffic conditioners API
 *
 * The majority of interaction between struct ifqueue and a traffic
 * conditioner occurs via the callbacks a traffic conditioner provides
 * in an instance of struct ifq_ops.
 *
 * XXX document ifqop_*
 *
 * The ifqueue API implements the locking on behalf of the conditioning
 * implementations so conditioners only have to reject or keep mbufs.
 * If something needs to inspect a conditioners internals, the queue lock
 * needs to be taken to allow for a consistent or safe view. The queue
 * lock may be taken and released with ifq_q_enter() and ifq_q_leave().
 *
 * === ifq_q_enter()
 *
 * Code wishing to access a conditioners internals may take the queue
 * lock with ifq_q_enter(). The caller must pass a reference to the
 * conditioners ifq_ops structure so the infrastructure can ensure the
 * caller is able to understand the internals. ifq_q_enter() returns
 * a pointer to the conditions internal structures, or NULL if the
 * ifq_ops did not match the current conditioner.
 *
 * === ifq_q_leave()
 *
 * The queue lock acquired with ifq_q_enter() is released with
 * ifq_q_leave().
 *
 *
 * == Network Driver API
 *
 * The API used by network drivers is mostly documented in the
 * ifq_dequeue(9) manpage except for ifq_serialize(),
 * ifq_is_serialized(), and IFQ_ASSERT_SERIALIZED().
 *
 * === ifq_serialize()
 *
 * A driver may run arbitrary work in the ifqueue serialiser context
 * via ifq_serialize(). The work to be done is represented by a task
 * that has been prepared with task_set.
 *
 * The work will be run in series with any other work dispatched by
 * ifq_start(), ifq_restart(), or other ifq_serialize() calls.
 *
 * Because the work may be run on another CPU, the lifetime of the
 * task and the work it represents can extend beyond the end of the
 * call to ifq_serialize() that dispatched it.
 *
 * === ifq_is_serialized()
 *
 * This function returns whether the caller is currently within the
 * ifqueue serializer context.
 *
 * === IFQ_ASSERT_SERIALIZED()
 *
 * This macro will assert that the caller is currently within the
 * specified ifqueue serialiser context.
 *
 *
 * = ifqueue work serialisation
 *
 * ifqueues provide a mechanism to dispatch work to be run in a single
 * context. Work in this mechanism is represtented by task structures.
 *
 * The tasks are run in a context similar to a taskq serviced by a
 * single kernel thread, except the work is run immediately by the
 * first CPU that dispatches work. If a second CPU attempts to dispatch
 * additional tasks while the first is still running, it will be queued
 * to be run by the first CPU. The second CPU will return immediately.
 *
 * = MP Safe Network Drivers
 *
 * An MP safe network driver is one in which its start routine can be
 * called by the network stack without holding the big kernel lock.
 *
 * == Attach
 *
 * A driver advertises it's ability to run its start routine by setting
 * the IFXF_MPSAFE flag in ifp->if_xflags before calling if_attach():
 *
 * 	ifp->if_xflags = IFXF_MPSAFE;
 * 	ifp->if_start = drv_start;
 * 	if_attach(ifp);
 *
 * The network stack will then wrap its calls to ifp->if_start with
 * ifq_start() to guarantee there is only one instance of that function
 * running in the system and to serialise it with other work the driver
 * may provide.
 *
 * == Initialise
 *
 * When the stack requests an interface be brought up (ie, drv_ioctl()
 * is called to handle SIOCSIFFLAGS with IFF_UP set in ifp->if_flags)
 * drivers should set IFF_RUNNING in ifp->if_flags and call
 * ifq_clr_oactive().
 *
 * == if_start
 *
 * ifq_start() checks that IFF_RUNNING is set in ifp->if_flags, that
 * ifq_is_oactive() does not return true, and that there are pending
 * packets to transmit via a call to ifq_len(). Therefore, drivers are
 * no longer responsible for doing this themselves.
 *
 * If a driver should not transmit packets while its link is down, use
 * ifq_purge() to flush pending packets from the transmit queue.
 *
 * Drivers for hardware should use the following pattern to transmit
 * packets:
 *
 * 	void
 * 	drv_start(struct ifnet *ifp)
 * 	{
 * 		struct drv_softc *sc = ifp->if_softc;
 * 		struct mbuf *m;
 * 		int kick = 0;
 *
 * 		if (NO_LINK) {
 * 			ifq_purge(&ifp->if_snd);
 * 			return;
 * 		}
 *
 * 		for (;;) {
 * 			if (NO_SPACE) {
 * 				ifq_set_oactive(&ifp->if_snd);
 * 				break;
 * 			}
 *
 * 			m = ifq_dequeue(&ifp->if_snd);
 * 			if (m == NULL)
 * 				break;
 *
 * 			if (drv_encap(sc, m) != 0) { // map and fill ring
 * 				m_freem(m);
 * 				continue;
 * 			}
 *
 * 			bpf_mtap();
 * 		}
 *
 *  		drv_kick(sc); // notify hw of new descriptors on the ring
 * 	 }
 *
 * == Transmission completion
 *
 * The following pattern should be used for transmit queue interrupt
 * processing:
 *
 * 	void
 * 	drv_txeof(struct drv_softc *sc)
 * 	{
 * 		struct ifnet *ifp = &sc->sc_if;
 *
 * 		while (COMPLETED_PKTS) {
 * 			// unmap packets, m_freem() the mbufs.
 * 		}
 *
 * 		if (ifq_is_oactive(&ifp->if_snd))
 * 			ifq_restart(&ifp->if_snd);
 * 	}
 *
 * == Stop
 *
 * Bringing an interface down (ie, IFF_UP was cleared in ifp->if_flags)
 * should clear IFF_RUNNING in ifp->if_flags, and guarantee the start
 * routine is not running before freeing any resources it uses:
 *
 * 	void
 * 	drv_down(struct drv_softc *sc)
 * 	{
 * 		struct ifnet *ifp = &sc->sc_if;
 *
 * 		CLR(ifp->if_flags, IFF_RUNNING);
 * 		DISABLE_INTERRUPTS();
 *
 * 		ifq_barrier(&ifp->if_snd);
 * 		intr_barrier(sc->sc_ih);
 *
 * 		FREE_RESOURCES();
 *
 * 		ifq_clr_oactive();
 * 	}
 *
 */

struct ifq_ops {
	void			*(*ifqop_alloc)(void *);
	void			 (*ifqop_free)(void *);
	int			 (*ifqop_enq)(struct ifqueue *, struct mbuf *);
	struct mbuf 		*(*ifqop_deq_begin)(struct ifqueue *, void **);
	void			 (*ifqop_deq_commit)(struct ifqueue *,
				    struct mbuf *, void *);
	void	 		 (*ifqop_purge)(struct ifqueue *,
				    struct mbuf_list *);
};

/*
 * Interface send queues.
 */

void		 ifq_init(struct ifqueue *, struct ifnet *);
void		 ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
void		 ifq_destroy(struct ifqueue *);
int		 ifq_enqueue_try(struct ifqueue *, struct mbuf *);
int		 ifq_enqueue(struct ifqueue *, struct mbuf *);
struct mbuf	*ifq_deq_begin(struct ifqueue *);
void		 ifq_deq_commit(struct ifqueue *, struct mbuf *);
void		 ifq_deq_rollback(struct ifqueue *, struct mbuf *);
struct mbuf	*ifq_dequeue(struct ifqueue *);
unsigned int	 ifq_purge(struct ifqueue *);
void		*ifq_q_enter(struct ifqueue *, const struct ifq_ops *);
void		 ifq_q_leave(struct ifqueue *, void *);
void		 ifq_serialize(struct ifqueue *, struct task *);
int		 ifq_is_serialized(struct ifqueue *);
void		 ifq_barrier(struct ifqueue *);

#define	ifq_len(_ifq)			((_ifq)->ifq_len)
#define	ifq_empty(_ifq)			(ifq_len(_ifq) == 0)
#define	ifq_set_maxlen(_ifq, _l)	((_ifq)->ifq_maxlen = (_l))

static inline void
ifq_set_oactive(struct ifqueue *ifq)
{
	ifq->ifq_oactive = 1;
}

static inline void
ifq_clr_oactive(struct ifqueue *ifq)
{
	ifq->ifq_oactive = 0;
}

static inline unsigned int
ifq_is_oactive(struct ifqueue *ifq)
{
	return (ifq->ifq_oactive);
}

static inline void
ifq_start(struct ifqueue *ifq)
{
	ifq_serialize(ifq, &ifq->ifq_start);
}

static inline void
ifq_restart(struct ifqueue *ifq)
{
	ifq_serialize(ifq, &ifq->ifq_restart);
}

#define IFQ_ASSERT_SERIALIZED(_ifq)	KASSERT(ifq_is_serialized(_ifq))

extern const struct ifq_ops * const ifq_priq_ops;

#endif /* _KERNEL */

#endif /* _NET_IFQ_H_ */