summaryrefslogtreecommitdiff
path: root/sys/kern
diff options
context:
space:
mode:
authorDavid Gwynne <dlg@cvs.openbsd.org>2015-09-01 03:47:59 +0000
committerDavid Gwynne <dlg@cvs.openbsd.org>2015-09-01 03:47:59 +0000
commit51de94583baafa8826ed6a8830f9d5e826be4a6b (patch)
tree18da926e9f972c1ab8d6a78e7a248ec3adb4b19f /sys/kern
parent7a9d941b69731bf9e42e4752c23c6b08d16e0b10 (diff)
mattieu baptiste reported a problem with bpf+srps where the per cpu
hazard pointers were becoming corrupt and therefore panics. the problem turned out to be that bridge_input calls if_input on behalf of a hardware interface which then calls bpf_mtap at splsoftnet, while the actual hardware nic calls if_input and bpf_mtap at splnet. the hardware interrupts ran in the middle of the bpf calls bridge runs at softnet. this means the same srps are being entered and left on the same cpu at different ipls, which led to races because of the order of operations on the per cpu hazard pointers. after a lot of experimentation, jmatthew@ figured out how to deal with this problem without introducing per cpu critical sections (ie, splhigh) calls in srp_enter and srp_leave, and without introducing atomic operations. the solution is to iterate forward through the array of hazard pointers in srp_enter, and backward in srp_leave to clear. if you guarantee that you leave srps in the reverse order to entering them, then you can use the same set of SRPs at different IPLs on the same CPU. the ordering requirement is a problem if we want to build linked data structures out of srps because you need to hold a ref to the current element containing the next srp to use it, before giving up the current ref. we're adding srp_follow() to support taking the next ref and giving up the current one while preserving the structure of the hazard pointer list. srp_follow() does this by reusing the hazard pointer for the current reference for the next ref. both mattieu baptiste and jmatthew@ have been hitting this pretty hard with a tweaked version of srp+bpf that uses srp_follow instead of interleaved srp_enter/srp_leave sequences. neither can reproduce the panics anymore. thanks to mattieu for the report and tests ok jmatthew@
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/kern_srp.c61
1 files changed, 42 insertions, 19 deletions
diff --git a/sys/kern/kern_srp.c b/sys/kern/kern_srp.c
index dcd08742558..02e607945d3 100644
--- a/sys/kern/kern_srp.c
+++ b/sys/kern/kern_srp.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_srp.c,v 1.1 2015/07/02 01:34:00 dlg Exp $ */
+/* $OpenBSD: kern_srp.c,v 1.2 2015/09/01 03:47:58 dlg Exp $ */
/*
* Copyright (c) 2014 Jonathan Matthew <jmatthew@openbsd.org>
@@ -187,24 +187,12 @@ srp_finalize(struct srp_gc *srp_gc)
}
}
-void *
-srp_enter(struct srp *srp)
+static inline void *
+srp_v(struct srp_hazard *hzrd, struct srp *srp)
{
- struct cpu_info *ci = curcpu();
- struct srp_hazard *hzrd;
void *v;
- u_int i;
-
- for (i = 0; i < nitems(ci->ci_srp_hazards); i++) {
- hzrd = &ci->ci_srp_hazards[i];
- if (hzrd->sh_p == NULL)
- break;
- }
- if (__predict_false(i == nitems(ci->ci_srp_hazards)))
- panic("%s: not enough srp hazard records", __func__);
hzrd->sh_p = srp;
- membar_producer();
/*
* ensure we update this cpu's hazard pointer to a value that's still
@@ -220,8 +208,8 @@ srp_enter(struct srp *srp)
return (v);
}
-void
-srp_leave(struct srp *srp, void *v)
+void *
+srp_enter(struct srp *srp)
{
struct cpu_info *ci = curcpu();
struct srp_hazard *hzrd;
@@ -229,9 +217,44 @@ srp_leave(struct srp *srp, void *v)
for (i = 0; i < nitems(ci->ci_srp_hazards); i++) {
hzrd = &ci->ci_srp_hazards[i];
- if (hzrd->sh_p == srp) {
+ if (hzrd->sh_p == NULL)
+ return (srp_v(hzrd, srp));
+ }
+
+ panic("%s: not enough srp hazard records", __func__);
+
+ /* NOTREACHED */
+ return (NULL);
+}
+
+void *
+srp_follow(struct srp *srp, void *v, struct srp *next)
+{
+ struct cpu_info *ci = curcpu();
+ struct srp_hazard *hzrd;
+
+ hzrd = ci->ci_srp_hazards + nitems(ci->ci_srp_hazards);
+ while (hzrd-- != ci->ci_srp_hazards) {
+ if (hzrd->sh_p == srp && hzrd->sh_v == v)
+ return (srp_v(hzrd, next));
+ }
+
+ panic("%s: unexpected ref %p via %p", __func__, v, srp);
+
+ /* NOTREACHED */
+ return (NULL);
+}
+
+void
+srp_leave(struct srp *srp, void *v)
+{
+ struct cpu_info *ci = curcpu();
+ struct srp_hazard *hzrd;
+
+ hzrd = ci->ci_srp_hazards + nitems(ci->ci_srp_hazards);
+ while (hzrd-- != ci->ci_srp_hazards) {
+ if (hzrd->sh_p == srp && hzrd->sh_v == v) {
hzrd->sh_p = NULL;
- hzrd->sh_v = NULL;
return;
}
}