diff options
author | Visa Hankala <visa@cvs.openbsd.org> | 2021-11-06 05:48:48 +0000 |
---|---|---|
committer | Visa Hankala <visa@cvs.openbsd.org> | 2021-11-06 05:48:48 +0000 |
commit | 5155a287d400e5d529dcae2c2a93f9d298b75463 (patch) | |
tree | c1f5a757f678117b13ee00a41a74211b3e463406 /sys | |
parent | 050fdf90f8fece4f71708c1c4b9da22eb0e54775 (diff) |
Make kqread event filter MP-safe
Use the monitored kqueue's kq_lock to serialize kqueue and knote access.
Typically, the "object lock" would cover also the klist, but that is not
possible with kqueues. knote_activate() needs kq_lock of the monitoring
kqueue, which would create lock order troubles if kq_lock was held when
calling KNOTE(&kq->kq_sel.si_note). Avoid this by using a separate klist
lock for kqueues.
The new klist lock is system-wide. Each kqueue instance could have
a dedicated klist lock. However, the efficacy of dedicated versus
system-wide lock is somewhat limited because the current implementation
activates kqueue knotes through a single thread.
OK mpi@
Diffstat (limited to 'sys')
-rw-r--r-- | sys/kern/kern_event.c | 66 |
1 files changed, 60 insertions, 6 deletions
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 030b20eba84..ab663547142 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_event.c,v 1.169 2021/07/24 09:16:51 mpi Exp $ */ +/* $OpenBSD: kern_event.c,v 1.170 2021/11/06 05:48:47 visa Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> @@ -128,6 +128,9 @@ void knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, void filt_kqdetach(struct knote *kn); int filt_kqueue(struct knote *kn, long hint); +int filt_kqueuemodify(struct kevent *kev, struct knote *kn); +int filt_kqueueprocess(struct knote *kn, struct kevent *kev); +int filt_kqueue_common(struct knote *kn, struct kqueue *kq); int filt_procattach(struct knote *kn); void filt_procdetach(struct knote *kn); int filt_proc(struct knote *kn, long hint); @@ -140,10 +143,12 @@ int filt_timerprocess(struct knote *kn, struct kevent *kev); void filt_seltruedetach(struct knote *kn); const struct filterops kqread_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_kqdetach, .f_event = filt_kqueue, + .f_modify = filt_kqueuemodify, + .f_process = filt_kqueueprocess, }; const struct filterops proc_filtops = { @@ -171,6 +176,7 @@ const struct filterops timer_filtops = { struct pool knote_pool; struct pool kqueue_pool; +struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); int kq_ntimeouts = 0; int kq_timeoutmax = (4 * 1024); @@ -219,6 +225,7 @@ KQRELE(struct kqueue *kq) free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * sizeof(struct knlist)); hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); + klist_free(&kq->kq_sel.si_note); pool_put(&kqueue_pool, kq); } @@ -254,7 +261,7 @@ kqueue_kqfilter(struct file *fp, struct knote *kn) return (EINVAL); kn->kn_fop = &kqread_filtops; - klist_insert_locked(&kq->kq_sel.si_note, kn); + klist_insert(&kq->kq_sel.si_note, kn); return (0); } @@ -263,18 +270,62 @@ filt_kqdetach(struct knote *kn) { struct kqueue *kq = kn->kn_fp->f_data; - klist_remove_locked(&kq->kq_sel.si_note, kn); + klist_remove(&kq->kq_sel.si_note, kn); +} + +int +filt_kqueue_common(struct knote *kn, struct kqueue *kq) +{ + MUTEX_ASSERT_LOCKED(&kq->kq_lock); + + kn->kn_data = kq->kq_count; + + return (kn->kn_data > 0); } int filt_kqueue(struct knote *kn, long hint) { struct kqueue *kq = kn->kn_fp->f_data; + int active; mtx_enter(&kq->kq_lock); - kn->kn_data = kq->kq_count; + active = filt_kqueue_common(kn, kq); mtx_leave(&kq->kq_lock); - return (kn->kn_data > 0); + + return (active); +} + +int +filt_kqueuemodify(struct kevent *kev, struct knote *kn) +{ + struct kqueue *kq = kn->kn_fp->f_data; + int active; + + mtx_enter(&kq->kq_lock); + knote_modify(kev, kn); + active = filt_kqueue_common(kn, kq); + mtx_leave(&kq->kq_lock); + + return (active); +} + +int +filt_kqueueprocess(struct knote *kn, struct kevent *kev) +{ + struct kqueue *kq = kn->kn_fp->f_data; + int active; + + mtx_enter(&kq->kq_lock); + if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) + active = 1; + else + active = filt_kqueue_common(kn, kq); + if (active) + knote_submit(kn, kev); + mtx_leave(&kq->kq_lock); + + return (active); } int @@ -821,6 +872,7 @@ kqueue_alloc(struct filedesc *fdp) TAILQ_INIT(&kq->kq_head); mtx_init(&kq->kq_lock, IPL_HIGH); task_set(&kq->kq_task, kqueue_task, kq); + klist_init_mutex(&kq->kq_sel.si_note, &kqueue_klist_lock); return (kq); } @@ -1529,6 +1581,7 @@ kqueue_task(void *arg) /* Kernel lock is needed inside selwakeup(). */ KERNEL_ASSERT_LOCKED(); + mtx_enter(&kqueue_klist_lock); mtx_enter(&kq->kq_lock); if (kq->kq_state & KQ_SEL) { kq->kq_state &= ~KQ_SEL; @@ -1538,6 +1591,7 @@ kqueue_task(void *arg) mtx_leave(&kq->kq_lock); KNOTE(&kq->kq_sel.si_note, 0); } + mtx_leave(&kqueue_klist_lock); KQRELE(kq); } |