diff options
author | Visa Hankala <visa@cvs.openbsd.org> | 2019-12-12 16:31:07 +0000 |
---|---|---|
committer | Visa Hankala <visa@cvs.openbsd.org> | 2019-12-12 16:31:07 +0000 |
commit | 343719e171076553d8e73c43636ee37e6c68b91c (patch) | |
tree | aa937ffb8025a39e1b0aa116392a6b3d9a01d548 /sys | |
parent | ec21f979bb84e447b9e25fc6bb627ee58c10af9f (diff) |
Allow sleeping inside kqueue event filters.
In kqueue_scan(), threads have to get an exclusive access to a knote
before processing by calling knote_acquire(). This prevents the knote
from being destroyed while it is still in use. knote_acquire() also
blocks other threads from processing the knote. Once knote processing
has finished, the thread has to call knote_release().
The kqueue subsystem is still serialized by the kernel lock. If an event
filter sleeps, the kernel lock is released and another thread might
enter kqueue_scan(). kqueue_scan() uses start and end markers to keep
track of the scan's progress and it has to be aware of other threads'
markers.
This patch is a revised version of mpi@'s work derived
from DragonFly BSD. kqueue_check() has been adapted from NetBSD.
Tested by anton@, sashan@
OK mpi@, anton@, sashan@
Diffstat (limited to 'sys')
-rw-r--r-- | sys/kern/kern_event.c | 160 | ||||
-rw-r--r-- | sys/sys/event.h | 6 |
2 files changed, 148 insertions, 18 deletions
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index d8378339814..deaf8366ab5 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_event.c,v 1.108 2019/12/11 07:30:09 guenther Exp $ */ +/* $OpenBSD: kern_event.c,v 1.109 2019/12/12 16:31:06 visa Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> @@ -83,6 +83,8 @@ void knote_attach(struct knote *kn); void knote_drop(struct knote *kn, struct proc *p); void knote_enqueue(struct knote *kn); void knote_dequeue(struct knote *kn); +int knote_acquire(struct knote *kn); +void knote_release(struct knote *kn); #define knote_alloc() ((struct knote *)pool_get(&knote_pool, PR_WAITOK)) #define knote_free(kn) pool_put(&knote_pool, (kn)) @@ -556,6 +558,48 @@ sys_kevent(struct proc *p, void *v, register_t *retval) return (error); } +#ifdef KQUEUE_DEBUG +void +kqueue_do_check(struct kqueue *kq, const char *func, int line) +{ + struct knote *kn; + int count = 0, nmarker = 0; + + KERNEL_ASSERT_LOCKED(); + splassert(IPL_HIGH); + + TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { + if (kn->kn_filter == EVFILT_MARKER) { + if ((kn->kn_status & KN_QUEUED) != 0) + panic("%s:%d: kq=%p kn=%p marker QUEUED", + func, line, kq, kn); + nmarker++; + } else { + if ((kn->kn_status & KN_ACTIVE) == 0) + panic("%s:%d: kq=%p kn=%p knote !ACTIVE", + func, line, kq, kn); + if ((kn->kn_status & KN_QUEUED) == 0) + panic("%s:%d: kq=%p kn=%p knote !QUEUED", + func, line, kq, kn); + if (kn->kn_kq != kq) + panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", + func, line, kq, kn, kn->kn_kq); + count++; + if (count > kq->kq_count) + goto bad; + } + } + if (count != kq->kq_count) { +bad: + panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", + func, line, kq, kq->kq_count, count, nmarker); + } +} +#define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) +#else +#define kqueue_check(kq) do {} while (0) +#endif + int kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) { @@ -696,7 +740,7 @@ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp, { struct kevent *kevp; struct timespec elapsed, start, stop; - struct knote *kn, marker; + struct knote mend, mstart, *kn; int s, count, timeout, nkev = 0, error = 0; struct kevent kev[KQ_NEVENTS]; @@ -709,6 +753,9 @@ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp, goto done; } + memset(&mstart, 0, sizeof(mstart)); + memset(&mend, 0, sizeof(mend)); + retry: if (kq->kq_state & KQ_DYING) { error = EBADF; @@ -744,34 +791,55 @@ retry: goto done; } - TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe); + mstart.kn_filter = EVFILT_MARKER; + mstart.kn_status = KN_PROCESSING; + TAILQ_INSERT_HEAD(&kq->kq_head, &mstart, kn_tqe); + mend.kn_filter = EVFILT_MARKER; + mend.kn_status = KN_PROCESSING; + TAILQ_INSERT_TAIL(&kq->kq_head, &mend, kn_tqe); while (count) { - kn = TAILQ_FIRST(&kq->kq_head); - if (kn == &marker) { - TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); - splx(s); - if (count == maxevents) - goto retry; - goto done; + kn = TAILQ_NEXT(&mstart, kn_tqe); + if (kn->kn_filter == EVFILT_MARKER) { + if (kn == &mend) { + TAILQ_REMOVE(&kq->kq_head, &mend, kn_tqe); + TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe); + splx(s); + if (count == maxevents) + goto retry; + goto done; + } + + /* Move start marker past another thread's marker. */ + TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe); + TAILQ_INSERT_AFTER(&kq->kq_head, kn, &mstart, kn_tqe); + continue; } + if (!knote_acquire(kn)) + continue; + + kqueue_check(kq); TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); + kn->kn_status &= ~KN_QUEUED; kq->kq_count--; + kqueue_check(kq); if (kn->kn_status & KN_DISABLED) { - kn->kn_status &= ~KN_QUEUED; + knote_release(kn); continue; } if ((kn->kn_flags & EV_ONESHOT) == 0 && kn->kn_fop->f_event(kn, 0) == 0) { - kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); + if ((kn->kn_status & KN_QUEUED) == 0) + kn->kn_status &= ~KN_ACTIVE; + knote_release(kn); + kqueue_check(kq); continue; } *kevp = kn->kn_kevent; kevp++; nkev++; if (kn->kn_flags & EV_ONESHOT) { - kn->kn_status &= ~KN_QUEUED; splx(s); kn->kn_fop->f_detach(kn); knote_drop(kn, p); @@ -783,11 +851,19 @@ retry: } if (kn->kn_flags & EV_DISPATCH) kn->kn_status |= KN_DISABLED; - kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); + if ((kn->kn_status & KN_QUEUED) == 0) + kn->kn_status &= ~KN_ACTIVE; + knote_release(kn); } else { - TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); - kq->kq_count++; + if ((kn->kn_status & KN_QUEUED) == 0) { + kqueue_check(kq); + kq->kq_count++; + kn->kn_status |= KN_QUEUED; + TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); + } + knote_release(kn); } + kqueue_check(kq); count--; if (nkev == KQ_NEVENTS) { splx(s); @@ -805,7 +881,8 @@ retry: break; } } - TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe); + TAILQ_REMOVE(&kq->kq_head, &mend, kn_tqe); + TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe); splx(s); done: if (nkev != 0) { @@ -914,6 +991,45 @@ kqueue_wakeup(struct kqueue *kq) } /* + * Acquire a knote, return non-zero on success, 0 on failure. + * + * If we cannot acquire the knote we sleep and return 0. The knote + * may be stale on return in this case and the caller must restart + * whatever loop they are in. + */ +int +knote_acquire(struct knote *kn) +{ + KASSERT(kn->kn_filter != EVFILT_MARKER); + + if (kn->kn_status & KN_PROCESSING) { + kn->kn_status |= KN_WAITING; + tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); + /* knote may be stale now */ + return (0); + } + kn->kn_status |= KN_PROCESSING; + return (1); +} + +/* + * Release an acquired knote, clearing KN_PROCESSING. + */ +void +knote_release(struct knote *kn) +{ + KASSERT(kn->kn_filter != EVFILT_MARKER); + KASSERT(kn->kn_status & KN_PROCESSING); + + if (kn->kn_status & KN_WAITING) { + kn->kn_status &= ~KN_WAITING; + wakeup(kn); + } + kn->kn_status &= ~KN_PROCESSING; + /* kn should not be accessed anymore */ +} + +/* * activate one knote. */ void @@ -946,6 +1062,8 @@ knote_remove(struct proc *p, struct klist *list) struct knote *kn; while ((kn = SLIST_FIRST(list)) != NULL) { + if (!knote_acquire(kn)) + continue; kn->kn_fop->f_detach(kn); knote_drop(kn, p); } @@ -1032,6 +1150,8 @@ knote_drop(struct knote *kn, struct proc *p) struct kqueue *kq = kn->kn_kq; struct klist *list; + KASSERT(kn->kn_filter != EVFILT_MARKER); + if (kn->kn_fop->f_isfd) list = &kq->kq_knlist[kn->kn_id]; else @@ -1052,11 +1172,14 @@ knote_enqueue(struct knote *kn) struct kqueue *kq = kn->kn_kq; int s = splhigh(); + KASSERT(kn->kn_filter != EVFILT_MARKER); KASSERT((kn->kn_status & KN_QUEUED) == 0); + kqueue_check(kq); TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); kn->kn_status |= KN_QUEUED; kq->kq_count++; + kqueue_check(kq); splx(s); kqueue_wakeup(kq); } @@ -1067,11 +1190,14 @@ knote_dequeue(struct knote *kn) struct kqueue *kq = kn->kn_kq; int s = splhigh(); + KASSERT(kn->kn_filter != EVFILT_MARKER); KASSERT(kn->kn_status & KN_QUEUED); + kqueue_check(kq); TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); kn->kn_status &= ~KN_QUEUED; kq->kq_count--; + kqueue_check(kq); splx(s); } diff --git a/sys/sys/event.h b/sys/sys/event.h index e29c1128975..909367aea33 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -1,4 +1,4 @@ -/* $OpenBSD: event.h,v 1.30 2018/01/13 12:58:40 robert Exp $ */ +/* $OpenBSD: event.h,v 1.31 2019/12/12 16:31:06 visa Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> @@ -125,6 +125,8 @@ SLIST_HEAD(klist, knote); #ifdef _KERNEL +#define EVFILT_MARKER 0xf /* placemarker for tailq */ + /* * hint flag for in-kernel use - must not equal any existing note */ @@ -170,6 +172,8 @@ struct knote { #define KN_QUEUED 0x0002 /* event is on queue */ #define KN_DISABLED 0x0004 /* event is disabled */ #define KN_DETACHED 0x0008 /* knote is detached */ +#define KN_PROCESSING 0x0010 /* knote is being processed */ +#define KN_WAITING 0x0020 /* waiting on processing */ #define kn_id kn_kevent.ident #define kn_filter kn_kevent.filter |