diff options
author | cheloha <cheloha@cvs.openbsd.org> | 2020-12-26 16:30:59 +0000 |
---|---|---|
committer | cheloha <cheloha@cvs.openbsd.org> | 2020-12-26 16:30:59 +0000 |
commit | 893ef53ee7763c2392fa5cd0397f94ad645f1c99 (patch) | |
tree | abc56e32785a65ba4cd25f8418267f16b3198a74 | |
parent | 897d32aa526b00bf3d7f3e700b50c7c94a612e53 (diff) |
bpf(4): bpf_d struct: replace bd_rdStart member with bd_nreaders member
bd_rdStart is strange. It nominally represents the start of a read(2)
on a given bpf(4) descriptor, but there are several problems with it:
1. If there are multiple readers, the bd_rdStart is not set by subsequent
readers, so their timeout is screwed up. The read timeout should really
be tracked on a per-thread basis in bpfread().
2. We set bd_rdStart for poll(2), select(2), and kevent(2), even though
that makes no sense. We should not be setting bd_rdStart in bpfpoll()
or bpfkqfilter().
3. bd_rdStart is buggy. If ticks is 0 when the read starts then
bpf_catchpacket() won't wake up the reader. This is a problem
inherent to the design of bd_rdStart: it serves as both a boolean
and a scalar value, even though 0 is a valid value in the scalar
range.
So let's replace it with a better struct member. "bd_nreaders" is a
count of threads sleeping in bpfread(). It is incremented before a
thread goes to sleep in bpfread() and decremented when a thread wakes
up. If bd_nreaders is greater than zero when we reach bpf_catchpacket()
and fbuf is non-NULL we wake up all readers.
The read timeout, if any, is now tracked locally by the thread in
bpfread().
Unlike bd_rdStart, bpfpoll() and bpfkqfilter() don't touch
bd_nreaders.
Prompted by mpi@. Basic idea from dlg@. Lots of input from dlg@.
Tested by dlg@ with tcpdump(8) (blocking read) and flow-collector
(https://github.com/eait-itig/flow-collector, non-blocking read).
ok dlg@
-rw-r--r-- | sys/net/bpf.c | 56 | ||||
-rw-r--r-- | sys/net/bpfdesc.h | 4 |
2 files changed, 25 insertions, 35 deletions
diff --git a/sys/net/bpf.c b/sys/net/bpf.c index 742a8570f13..1ac05458f60 100644 --- a/sys/net/bpf.c +++ b/sys/net/bpf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: bpf.c,v 1.198 2020/12/25 12:59:53 visa Exp $ */ +/* $OpenBSD: bpf.c,v 1.199 2020/12/26 16:30:58 cheloha Exp $ */ /* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */ /* @@ -430,7 +430,7 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) { struct bpf_d *d; caddr_t hbuf; - int hlen, error; + int end, error, hlen, nticks; KERNEL_ASSERT_LOCKED(); @@ -451,13 +451,10 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) } /* - * If there's a timeout, bd_rdStart is tagged when we start the read. - * we can then figure out when we're done reading. + * If there's a timeout, mark when the read should end. */ - if (d->bd_rnonblock == 0 && d->bd_rdStart == 0) - d->bd_rdStart = ticks; - else - d->bd_rdStart = 0; + if (d->bd_rtout) + end = ticks + (int)d->bd_rtout; /* * If the hold buffer is empty, then do a timed sleep, which @@ -486,13 +483,21 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) if (d->bd_rnonblock) { /* User requested non-blocking I/O */ error = EWOULDBLOCK; + } else if (d->bd_rtout == 0) { + /* No read timeout set. */ + d->bd_nreaders++; + error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, + "bpf", INFSLP); + d->bd_nreaders--; + } else if ((nticks = end - ticks) > 0) { + /* Read timeout has not expired yet. */ + d->bd_nreaders++; + error = msleep(d, &d->bd_mtx, PRINET|PCATCH, "bpf", + nticks); + d->bd_nreaders--; } else { - if (d->bd_rdStart <= ULONG_MAX - d->bd_rtout && - d->bd_rdStart + d->bd_rtout < ticks) { - error = msleep(d, &d->bd_mtx, PRINET|PCATCH, - "bpf", d->bd_rtout); - } else - error = EWOULDBLOCK; + /* Read timeout has expired. */ + error = EWOULDBLOCK; } if (error == EINTR || error == ERESTART) goto out; @@ -1153,15 +1158,8 @@ bpfpoll(dev_t dev, int events, struct proc *p) mtx_enter(&d->bd_mtx); if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0)) revents |= events & (POLLIN | POLLRDNORM); - else { - /* - * if there's a timeout, mark the time we - * started waiting. - */ - if (d->bd_rnonblock == 0 && d->bd_rdStart == 0) - d->bd_rdStart = ticks; + else selrecord(p, &d->bd_sel); - } mtx_leave(&d->bd_mtx); } return (revents); @@ -1197,11 +1195,6 @@ bpfkqfilter(dev_t dev, struct knote *kn) kn->kn_hook = d; klist_insert_locked(klist, kn); - mtx_enter(&d->bd_mtx); - if (d->bd_rnonblock == 0 && d->bd_rdStart == 0) - d->bd_rdStart = ticks; - mtx_leave(&d->bd_mtx); - return (0); } @@ -1551,15 +1544,12 @@ bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen, do_wakeup = 1; } - if (d->bd_rdStart && d->bd_rdStart <= ULONG_MAX - d->bd_rtout && - d->bd_rdStart + d->bd_rtout < ticks) { + if (d->bd_nreaders > 0) { /* - * we could be selecting on the bpf, and we - * may have timeouts set. We got here by getting - * a packet, so wake up the reader. + * We have one or more threads sleeping in bpfread(). + * We got a packet, so wake up all readers. */ if (d->bd_fbuf != NULL) { - d->bd_rdStart = 0; ROTATE_BUFFERS(d); do_wakeup = 1; } diff --git a/sys/net/bpfdesc.h b/sys/net/bpfdesc.h index 62c15c598fb..065db296ecf 100644 --- a/sys/net/bpfdesc.h +++ b/sys/net/bpfdesc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bpfdesc.h,v 1.42 2020/12/11 05:00:21 cheloha Exp $ */ +/* $OpenBSD: bpfdesc.h,v 1.43 2020/12/26 16:30:58 cheloha Exp $ */ /* $NetBSD: bpfdesc.h,v 1.11 1995/09/27 18:30:42 thorpej Exp $ */ /* @@ -79,7 +79,7 @@ struct bpf_d { struct bpf_if *bd_bif; /* interface descriptor */ u_long bd_rtout; /* [m] Read timeout in 'ticks' */ - u_long bd_rdStart; /* when the read started */ + u_long bd_nreaders; /* [m] # threads asleep in bpfread() */ int bd_rnonblock; /* true if nonblocking reads are set */ struct bpf_program_smr *bd_rfilter; /* read filter code */ |