summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcheloha <cheloha@cvs.openbsd.org>2020-12-26 16:30:59 +0000
committercheloha <cheloha@cvs.openbsd.org>2020-12-26 16:30:59 +0000
commit893ef53ee7763c2392fa5cd0397f94ad645f1c99 (patch)
treeabc56e32785a65ba4cd25f8418267f16b3198a74
parent897d32aa526b00bf3d7f3e700b50c7c94a612e53 (diff)
bpf(4): bpf_d struct: replace bd_rdStart member with bd_nreaders member
bd_rdStart is strange. It nominally represents the start of a read(2) on a given bpf(4) descriptor, but there are several problems with it: 1. If there are multiple readers, the bd_rdStart is not set by subsequent readers, so their timeout is screwed up. The read timeout should really be tracked on a per-thread basis in bpfread(). 2. We set bd_rdStart for poll(2), select(2), and kevent(2), even though that makes no sense. We should not be setting bd_rdStart in bpfpoll() or bpfkqfilter(). 3. bd_rdStart is buggy. If ticks is 0 when the read starts then bpf_catchpacket() won't wake up the reader. This is a problem inherent to the design of bd_rdStart: it serves as both a boolean and a scalar value, even though 0 is a valid value in the scalar range. So let's replace it with a better struct member. "bd_nreaders" is a count of threads sleeping in bpfread(). It is incremented before a thread goes to sleep in bpfread() and decremented when a thread wakes up. If bd_nreaders is greater than zero when we reach bpf_catchpacket() and fbuf is non-NULL we wake up all readers. The read timeout, if any, is now tracked locally by the thread in bpfread(). Unlike bd_rdStart, bpfpoll() and bpfkqfilter() don't touch bd_nreaders. Prompted by mpi@. Basic idea from dlg@. Lots of input from dlg@. Tested by dlg@ with tcpdump(8) (blocking read) and flow-collector (https://github.com/eait-itig/flow-collector, non-blocking read). ok dlg@
-rw-r--r--sys/net/bpf.c56
-rw-r--r--sys/net/bpfdesc.h4
2 files changed, 25 insertions, 35 deletions
diff --git a/sys/net/bpf.c b/sys/net/bpf.c
index 742a8570f13..1ac05458f60 100644
--- a/sys/net/bpf.c
+++ b/sys/net/bpf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: bpf.c,v 1.198 2020/12/25 12:59:53 visa Exp $ */
+/* $OpenBSD: bpf.c,v 1.199 2020/12/26 16:30:58 cheloha Exp $ */
/* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */
/*
@@ -430,7 +430,7 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
{
struct bpf_d *d;
caddr_t hbuf;
- int hlen, error;
+ int end, error, hlen, nticks;
KERNEL_ASSERT_LOCKED();
@@ -451,13 +451,10 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
}
/*
- * If there's a timeout, bd_rdStart is tagged when we start the read.
- * we can then figure out when we're done reading.
+ * If there's a timeout, mark when the read should end.
*/
- if (d->bd_rnonblock == 0 && d->bd_rdStart == 0)
- d->bd_rdStart = ticks;
- else
- d->bd_rdStart = 0;
+ if (d->bd_rtout)
+ end = ticks + (int)d->bd_rtout;
/*
* If the hold buffer is empty, then do a timed sleep, which
@@ -486,13 +483,21 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
if (d->bd_rnonblock) {
/* User requested non-blocking I/O */
error = EWOULDBLOCK;
+ } else if (d->bd_rtout == 0) {
+ /* No read timeout set. */
+ d->bd_nreaders++;
+ error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH,
+ "bpf", INFSLP);
+ d->bd_nreaders--;
+ } else if ((nticks = end - ticks) > 0) {
+ /* Read timeout has not expired yet. */
+ d->bd_nreaders++;
+ error = msleep(d, &d->bd_mtx, PRINET|PCATCH, "bpf",
+ nticks);
+ d->bd_nreaders--;
} else {
- if (d->bd_rdStart <= ULONG_MAX - d->bd_rtout &&
- d->bd_rdStart + d->bd_rtout < ticks) {
- error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
- "bpf", d->bd_rtout);
- } else
- error = EWOULDBLOCK;
+ /* Read timeout has expired. */
+ error = EWOULDBLOCK;
}
if (error == EINTR || error == ERESTART)
goto out;
@@ -1153,15 +1158,8 @@ bpfpoll(dev_t dev, int events, struct proc *p)
mtx_enter(&d->bd_mtx);
if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
revents |= events & (POLLIN | POLLRDNORM);
- else {
- /*
- * if there's a timeout, mark the time we
- * started waiting.
- */
- if (d->bd_rnonblock == 0 && d->bd_rdStart == 0)
- d->bd_rdStart = ticks;
+ else
selrecord(p, &d->bd_sel);
- }
mtx_leave(&d->bd_mtx);
}
return (revents);
@@ -1197,11 +1195,6 @@ bpfkqfilter(dev_t dev, struct knote *kn)
kn->kn_hook = d;
klist_insert_locked(klist, kn);
- mtx_enter(&d->bd_mtx);
- if (d->bd_rnonblock == 0 && d->bd_rdStart == 0)
- d->bd_rdStart = ticks;
- mtx_leave(&d->bd_mtx);
-
return (0);
}
@@ -1551,15 +1544,12 @@ bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen,
do_wakeup = 1;
}
- if (d->bd_rdStart && d->bd_rdStart <= ULONG_MAX - d->bd_rtout &&
- d->bd_rdStart + d->bd_rtout < ticks) {
+ if (d->bd_nreaders > 0) {
/*
- * we could be selecting on the bpf, and we
- * may have timeouts set. We got here by getting
- * a packet, so wake up the reader.
+ * We have one or more threads sleeping in bpfread().
+ * We got a packet, so wake up all readers.
*/
if (d->bd_fbuf != NULL) {
- d->bd_rdStart = 0;
ROTATE_BUFFERS(d);
do_wakeup = 1;
}
diff --git a/sys/net/bpfdesc.h b/sys/net/bpfdesc.h
index 62c15c598fb..065db296ecf 100644
--- a/sys/net/bpfdesc.h
+++ b/sys/net/bpfdesc.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: bpfdesc.h,v 1.42 2020/12/11 05:00:21 cheloha Exp $ */
+/* $OpenBSD: bpfdesc.h,v 1.43 2020/12/26 16:30:58 cheloha Exp $ */
/* $NetBSD: bpfdesc.h,v 1.11 1995/09/27 18:30:42 thorpej Exp $ */
/*
@@ -79,7 +79,7 @@ struct bpf_d {
struct bpf_if *bd_bif; /* interface descriptor */
u_long bd_rtout; /* [m] Read timeout in 'ticks' */
- u_long bd_rdStart; /* when the read started */
+ u_long bd_nreaders; /* [m] # threads asleep in bpfread() */
int bd_rnonblock; /* true if nonblocking reads are set */
struct bpf_program_smr
*bd_rfilter; /* read filter code */