summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/kern/kern_event.c70
-rw-r--r--sys/kern/sys_generic.c49
-rw-r--r--sys/sys/event.h6
3 files changed, 73 insertions, 52 deletions
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index ab663547142..f33373fbaf7 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_event.c,v 1.170 2021/11/06 05:48:47 visa Exp $ */
+/* $OpenBSD: kern_event.c,v 1.171 2021/11/12 04:34:22 visa Exp $ */
/*-
* Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
@@ -73,6 +73,7 @@ void kqueue_terminate(struct proc *p, struct kqueue *);
void KQREF(struct kqueue *);
void KQRELE(struct kqueue *);
+void kqueue_purge(struct proc *, struct kqueue *);
int kqueue_sleep(struct kqueue *, struct timespec *);
int kqueue_read(struct file *, struct uio *, int);
@@ -763,29 +764,55 @@ filter_process(struct knote *kn, struct kevent *kev)
return (active);
}
+/*
+ * Initialize the current thread for poll/select system call.
+ * num indicates the number of serials that the system call may utilize.
+ * After this function, the valid range of serials is
+ * p_kq_serial <= x < p_kq_serial + num.
+ */
void
-kqpoll_init(void)
+kqpoll_init(unsigned int num)
{
struct proc *p = curproc;
struct filedesc *fdp;
- if (p->p_kq != NULL) {
- /*
- * Discard any badfd knotes that have been enqueued after
- * previous scan.
- * This prevents them from accumulating in case
- * scan does not make progress for some reason.
- */
- kqpoll_dequeue(p, 0);
- return;
+ if (p->p_kq == NULL) {
+ p->p_kq = kqueue_alloc(p->p_fd);
+ p->p_kq_serial = arc4random();
+ fdp = p->p_fd;
+ fdplock(fdp);
+ LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next);
+ fdpunlock(fdp);
}
- p->p_kq = kqueue_alloc(p->p_fd);
- p->p_kq_serial = arc4random();
- fdp = p->p_fd;
- fdplock(fdp);
- LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next);
- fdpunlock(fdp);
+ if (p->p_kq_serial + num < p->p_kq_serial) {
+ /* Serial is about to wrap. Clear all attached knotes. */
+ kqueue_purge(p, p->p_kq);
+ p->p_kq_serial = 0;
+ }
+
+ /*
+ * Discard any detached knotes that have been enqueued after
+ * previous scan.
+ * This prevents them from accumulating in case
+ * scan does not make progress for some reason.
+ */
+ kqpoll_dequeue(p, 0);
+}
+
+/*
+ * Finish poll/select system call.
+ * num must have the same value that was used with kqpoll_init().
+ */
+void
+kqpoll_done(unsigned int num)
+{
+ struct proc *p = curproc;
+
+ KASSERT(p->p_kq != NULL);
+ KASSERT(p->p_kq_serial + num >= p->p_kq_serial);
+
+ p->p_kq_serial += num;
}
void
@@ -1383,6 +1410,15 @@ retry:
mtx_leave(&kq->kq_lock);
+ /* Drop expired kqpoll knotes. */
+ if (p->p_kq == kq &&
+ p->p_kq_serial > (unsigned long)kn->kn_udata) {
+ filter_detach(kn);
+ knote_drop(kn, p);
+ mtx_enter(&kq->kq_lock);
+ continue;
+ }
+
memset(kevp, 0, sizeof(*kevp));
if (filter_process(kn, kevp) == 0) {
mtx_enter(&kq->kq_lock);
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index 9a10bcc0057..0af21d981fa 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: sys_generic.c,v 1.139 2021/10/29 15:52:44 anton Exp $ */
+/* $OpenBSD: sys_generic.c,v 1.140 2021/11/12 04:34:22 visa Exp $ */
/* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */
/*
@@ -633,7 +633,7 @@ dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex,
pobits[2] = (fd_set *)&bits[5];
}
- kqpoll_init();
+ kqpoll_init(nd);
#define getbits(name, x) \
if (name && (error = copyin(name, pibits[x], ni))) \
@@ -730,8 +730,7 @@ done:
if (pibits[0] != (fd_set *)&bits[0])
free(pibits[0], M_TEMP, 6 * ni);
- kqueue_purge(p, p->p_kq);
- p->p_kq_serial += nd;
+ kqpoll_done(nd);
return (error);
}
@@ -759,7 +758,7 @@ pselregister(struct proc *p, fd_set *pibits[3], fd_set *pobits[3], int nfd,
DPRINTFN(2, "select fd %d mask %d serial %lu\n",
fd, msk, p->p_kq_serial);
EV_SET(&kev, fd, evf[msk],
- EV_ADD|EV_ENABLE|EV_ONESHOT|__EV_POLL,
+ EV_ADD|EV_ENABLE|__EV_POLL,
evff[msk], 0, (void *)(p->p_kq_serial));
#ifdef KTRACE
if (KTRPOINT(p, KTR_STRUCT))
@@ -804,13 +803,10 @@ int
pselcollect(struct proc *p, struct kevent *kevp, fd_set *pobits[3],
int *ncollected)
{
- /* Filter out and lazily delete spurious events */
if ((unsigned long)kevp->udata != p->p_kq_serial) {
- DPRINTFN(0, "select fd %u mismatched serial %lu\n",
- (int)kevp->ident, p->p_kq_serial);
- kevp->flags = EV_DISABLE|EV_DELETE;
- kqueue_register(p->p_kq, kevp, p);
- return (0);
+ panic("%s: spurious kevp %p fd %d udata 0x%lx serial 0x%lx",
+ __func__, kevp, (int)kevp->ident,
+ (unsigned long)kevp->udata, p->p_kq_serial);
}
if (kevp->flags & EV_ERROR) {
@@ -1001,14 +997,14 @@ ppollregister(struct proc *p, struct pollfd *pl, int nfds, int *nregistered)
kevp = kev;
if (pl[i].events & (POLLIN | POLLRDNORM)) {
EV_SET(kevp, pl[i].fd, EVFILT_READ,
- EV_ADD|EV_ENABLE|EV_ONESHOT|__EV_POLL, 0, 0,
+ EV_ADD|EV_ENABLE|__EV_POLL, 0, 0,
(void *)(p->p_kq_serial + i));
nkev++;
kevp++;
}
if (pl[i].events & (POLLOUT | POLLWRNORM)) {
EV_SET(kevp, pl[i].fd, EVFILT_WRITE,
- EV_ADD|EV_ENABLE|EV_ONESHOT|__EV_POLL, 0, 0,
+ EV_ADD|EV_ENABLE|__EV_POLL, 0, 0,
(void *)(p->p_kq_serial + i));
nkev++;
kevp++;
@@ -1017,7 +1013,7 @@ ppollregister(struct proc *p, struct pollfd *pl, int nfds, int *nregistered)
int evff = forcehup ? 0 : NOTE_OOB;
EV_SET(kevp, pl[i].fd, EVFILT_EXCEPT,
- EV_ADD|EV_ENABLE|EV_ONESHOT|__EV_POLL, evff, 0,
+ EV_ADD|EV_ENABLE|__EV_POLL, evff, 0,
(void *)(p->p_kq_serial + i));
nkev++;
kevp++;
@@ -1143,7 +1139,7 @@ doppoll(struct proc *p, struct pollfd *fds, u_int nfds,
return (EINVAL);
}
- kqpoll_init();
+ kqpoll_init(nfds);
sz = nfds * sizeof(*pl);
@@ -1230,8 +1226,7 @@ bad:
if (pl != pfds)
free(pl, M_TEMP, sz);
- kqueue_purge(p, p->p_kq);
- p->p_kq_serial += nfds;
+ kqpoll_done(nfds);
return (error);
}
@@ -1248,25 +1243,15 @@ ppollcollect(struct proc *p, struct kevent *kevp, struct pollfd *pl, u_int nfds)
/* Extract poll array index */
i = (unsigned long)kevp->udata - p->p_kq_serial;
- /*
- * Lazily delete spurious events.
- *
- * This should not happen as long as kqueue_purge() is called
- * at the end of every syscall. It migh be interesting to do
- * like DragonFlyBSD and not always allocated a new knote in
- * kqueue_register() with that lazy removal makes sense.
- */
if (i >= nfds) {
- DPRINTFN(0, "poll get out of range udata %lu vs serial %lu\n",
+ panic("%s: spurious kevp %p nfds %u udata 0x%lx serial 0x%lx",
+ __func__, kevp, nfds,
(unsigned long)kevp->udata, p->p_kq_serial);
- kevp->flags = EV_DISABLE|EV_DELETE;
- kqueue_register(p->p_kq, kevp, p);
- return (0);
}
if ((int)kevp->ident != pl[i].fd) {
- DPRINTFN(0, "poll get %lu/%d mismatch fd %u!=%d serial %lu\n",
- i+1, nfds, (int)kevp->ident, pl[i].fd, p->p_kq_serial);
- return (0);
+ panic("%s: kevp %p %lu/%d mismatch fd %d!=%d serial 0x%lx",
+ __func__, kevp, i + 1, nfds, (int)kevp->ident, pl[i].fd,
+ p->p_kq_serial);
}
/*
diff --git a/sys/sys/event.h b/sys/sys/event.h
index fe4b9f1560e..c67eb800bf7 100644
--- a/sys/sys/event.h
+++ b/sys/sys/event.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: event.h,v 1.57 2021/10/24 07:02:47 visa Exp $ */
+/* $OpenBSD: event.h,v 1.58 2021/11/12 04:34:23 visa Exp $ */
/*-
* Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
@@ -287,7 +287,8 @@ extern const struct filterops sig_filtops;
extern const struct filterops dead_filtops;
extern const struct klistops socket_klistops;
-extern void kqpoll_init(void);
+extern void kqpoll_init(unsigned int);
+extern void kqpoll_done(unsigned int);
extern void kqpoll_exit(void);
extern void knote(struct klist *list, long hint);
extern void knote_fdclose(struct proc *p, int fd);
@@ -302,7 +303,6 @@ extern int kqueue_scan(struct kqueue_scan_state *, int, struct kevent *,
struct timespec *, struct proc *, int *);
extern void kqueue_scan_setup(struct kqueue_scan_state *, struct kqueue *);
extern void kqueue_scan_finish(struct kqueue_scan_state *);
-extern void kqueue_purge(struct proc *, struct kqueue *);
extern int filt_seltrue(struct knote *kn, long hint);
extern int seltrue_kqfilter(dev_t, struct knote *);
extern void klist_init(struct klist *, const struct klistops *, void *);