summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorVisa Hankala <visa@cvs.openbsd.org>2021-11-06 05:48:48 +0000
committerVisa Hankala <visa@cvs.openbsd.org>2021-11-06 05:48:48 +0000
commit5155a287d400e5d529dcae2c2a93f9d298b75463 (patch)
treec1f5a757f678117b13ee00a41a74211b3e463406 /sys
parent050fdf90f8fece4f71708c1c4b9da22eb0e54775 (diff)
Make kqread event filter MP-safe
Use the monitored kqueue's kq_lock to serialize kqueue and knote access. Typically, the "object lock" would cover also the klist, but that is not possible with kqueues. knote_activate() needs kq_lock of the monitoring kqueue, which would create lock order troubles if kq_lock was held when calling KNOTE(&kq->kq_sel.si_note). Avoid this by using a separate klist lock for kqueues. The new klist lock is system-wide. Each kqueue instance could have a dedicated klist lock. However, the efficacy of dedicated versus system-wide lock is somewhat limited because the current implementation activates kqueue knotes through a single thread. OK mpi@
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/kern_event.c66
1 files changed, 60 insertions, 6 deletions
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index 030b20eba84..ab663547142 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_event.c,v 1.169 2021/07/24 09:16:51 mpi Exp $ */
+/* $OpenBSD: kern_event.c,v 1.170 2021/11/06 05:48:47 visa Exp $ */
/*-
* Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
@@ -128,6 +128,9 @@ void knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list,
void filt_kqdetach(struct knote *kn);
int filt_kqueue(struct knote *kn, long hint);
+int filt_kqueuemodify(struct kevent *kev, struct knote *kn);
+int filt_kqueueprocess(struct knote *kn, struct kevent *kev);
+int filt_kqueue_common(struct knote *kn, struct kqueue *kq);
int filt_procattach(struct knote *kn);
void filt_procdetach(struct knote *kn);
int filt_proc(struct knote *kn, long hint);
@@ -140,10 +143,12 @@ int filt_timerprocess(struct knote *kn, struct kevent *kev);
void filt_seltruedetach(struct knote *kn);
const struct filterops kqread_filtops = {
- .f_flags = FILTEROP_ISFD,
+ .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
.f_attach = NULL,
.f_detach = filt_kqdetach,
.f_event = filt_kqueue,
+ .f_modify = filt_kqueuemodify,
+ .f_process = filt_kqueueprocess,
};
const struct filterops proc_filtops = {
@@ -171,6 +176,7 @@ const struct filterops timer_filtops = {
struct pool knote_pool;
struct pool kqueue_pool;
+struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR);
int kq_ntimeouts = 0;
int kq_timeoutmax = (4 * 1024);
@@ -219,6 +225,7 @@ KQRELE(struct kqueue *kq)
free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize *
sizeof(struct knlist));
hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT);
+ klist_free(&kq->kq_sel.si_note);
pool_put(&kqueue_pool, kq);
}
@@ -254,7 +261,7 @@ kqueue_kqfilter(struct file *fp, struct knote *kn)
return (EINVAL);
kn->kn_fop = &kqread_filtops;
- klist_insert_locked(&kq->kq_sel.si_note, kn);
+ klist_insert(&kq->kq_sel.si_note, kn);
return (0);
}
@@ -263,18 +270,62 @@ filt_kqdetach(struct knote *kn)
{
struct kqueue *kq = kn->kn_fp->f_data;
- klist_remove_locked(&kq->kq_sel.si_note, kn);
+ klist_remove(&kq->kq_sel.si_note, kn);
+}
+
+int
+filt_kqueue_common(struct knote *kn, struct kqueue *kq)
+{
+ MUTEX_ASSERT_LOCKED(&kq->kq_lock);
+
+ kn->kn_data = kq->kq_count;
+
+ return (kn->kn_data > 0);
}
int
filt_kqueue(struct knote *kn, long hint)
{
struct kqueue *kq = kn->kn_fp->f_data;
+ int active;
mtx_enter(&kq->kq_lock);
- kn->kn_data = kq->kq_count;
+ active = filt_kqueue_common(kn, kq);
mtx_leave(&kq->kq_lock);
- return (kn->kn_data > 0);
+
+ return (active);
+}
+
+int
+filt_kqueuemodify(struct kevent *kev, struct knote *kn)
+{
+ struct kqueue *kq = kn->kn_fp->f_data;
+ int active;
+
+ mtx_enter(&kq->kq_lock);
+ knote_modify(kev, kn);
+ active = filt_kqueue_common(kn, kq);
+ mtx_leave(&kq->kq_lock);
+
+ return (active);
+}
+
+int
+filt_kqueueprocess(struct knote *kn, struct kevent *kev)
+{
+ struct kqueue *kq = kn->kn_fp->f_data;
+ int active;
+
+ mtx_enter(&kq->kq_lock);
+ if (kev != NULL && (kn->kn_flags & EV_ONESHOT))
+ active = 1;
+ else
+ active = filt_kqueue_common(kn, kq);
+ if (active)
+ knote_submit(kn, kev);
+ mtx_leave(&kq->kq_lock);
+
+ return (active);
}
int
@@ -821,6 +872,7 @@ kqueue_alloc(struct filedesc *fdp)
TAILQ_INIT(&kq->kq_head);
mtx_init(&kq->kq_lock, IPL_HIGH);
task_set(&kq->kq_task, kqueue_task, kq);
+ klist_init_mutex(&kq->kq_sel.si_note, &kqueue_klist_lock);
return (kq);
}
@@ -1529,6 +1581,7 @@ kqueue_task(void *arg)
/* Kernel lock is needed inside selwakeup(). */
KERNEL_ASSERT_LOCKED();
+ mtx_enter(&kqueue_klist_lock);
mtx_enter(&kq->kq_lock);
if (kq->kq_state & KQ_SEL) {
kq->kq_state &= ~KQ_SEL;
@@ -1538,6 +1591,7 @@ kqueue_task(void *arg)
mtx_leave(&kq->kq_lock);
KNOTE(&kq->kq_sel.si_note, 0);
}
+ mtx_leave(&kqueue_klist_lock);
KQRELE(kq);
}