diff options
author | Martin Pieuchot <mpi@cvs.openbsd.org> | 2024-11-02 16:59:23 +0000 |
---|---|---|
committer | Martin Pieuchot <mpi@cvs.openbsd.org> | 2024-11-02 16:59:23 +0000 |
commit | 326b0ae336c79018c8db575bfc8b45290e10581c (patch) | |
tree | 3137e564a1a4c2e47110b2921e22bbabcb4b7883 /sys/dev | |
parent | 2bbfb16e4c69ddccd92a2e2bbadcc01795ad1db8 (diff) |
Use a ringbuffer per CPU instead of a ringbuffer per PCB.
Get rid of the per-ringbuffer mutex. Use a variable to prevent against
recursion. Allow to process more events in the same timeframe.
From Christian Ludwig.
Diffstat (limited to 'sys/dev')
-rw-r--r-- | sys/dev/dt/dt_dev.c | 205 | ||||
-rw-r--r-- | sys/dev/dt/dtvar.h | 11 |
2 files changed, 132 insertions, 84 deletions
diff --git a/sys/dev/dt/dt_dev.c b/sys/dev/dt/dt_dev.c index 05ce734abf4..7fc183483d9 100644 --- a/sys/dev/dt/dt_dev.c +++ b/sys/dev/dt/dt_dev.c @@ -1,4 +1,4 @@ -/* $OpenBSD: dt_dev.c,v 1.37 2024/09/06 08:38:21 mpi Exp $ */ +/* $OpenBSD: dt_dev.c,v 1.38 2024/11/02 16:59:22 mpi Exp $ */ /* * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org> @@ -86,13 +86,34 @@ #define DPRINTF(x...) /* nothing */ /* + * Per-CPU Event States + * + * Locks used to protect struct members: + * r owned by read(2) proc + * c owned by CPU + * s sliced ownership, based on read/write indexes + * S written by CPU, read by read(2) proc + */ +struct dt_cpubuf { + unsigned int dc_prod; /* [r] read index */ + unsigned int dc_cons; /* [c] write index */ + struct dt_evt *dc_ring; /* [s] ring of event states */ + unsigned int dc_inevt; /* [c] in event already? */ + + /* Counters */ + unsigned int dc_dropevt; /* [S] # of events dropped */ + unsigned int dc_readevt; /* [r] # of events read */ +}; + +/* * Descriptor associated with each program opening /dev/dt. It is used * to keep track of enabled PCBs. * * Locks used to protect struct members in this file: * a atomic - * m per-softc mutex * K kernel lock + * r owned by read(2) proc + * I invariant after initialization */ struct dt_softc { SLIST_ENTRY(dt_softc) ds_next; /* [K] descriptor list */ @@ -100,15 +121,12 @@ struct dt_softc { pid_t ds_pid; /* [I] PID of tracing program */ void *ds_si; /* [I] to defer wakeup(9) */ - struct mutex ds_mtx; - struct dt_pcb_list ds_pcbs; /* [K] list of enabled PCBs */ int ds_recording; /* [K] currently recording? */ - int ds_evtcnt; /* [m] # of readable evts */ + unsigned int ds_evtcnt; /* [a] # of readable evts */ - /* Counters */ - uint64_t ds_readevt; /* [m] # of events read */ - uint64_t ds_dropevt; /* [m] # of events dropped */ + struct dt_cpubuf ds_cpu[MAXCPUS]; /* [I] Per-cpu event states */ + unsigned int ds_lastcpu; /* [r] last CPU ring read(2). */ }; SLIST_HEAD(, dt_softc) dtdev_list; /* [K] list of open /dev/dt nodes */ @@ -132,6 +150,8 @@ int dtread(dev_t, struct uio *, int); int dtioctl(dev_t, u_long, caddr_t, int, struct proc *); struct dt_softc *dtlookup(int); +struct dt_softc *dtalloc(void); +void dtfree(struct dt_softc *); int dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *); int dt_ioctl_get_args(struct dt_softc *, struct dtioc_arg *); @@ -142,8 +162,7 @@ int dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *); int dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *); int dt_ioctl_get_auxbase(struct dt_softc *, struct dtioc_getaux *); -int dt_pcb_ring_copy(struct dt_pcb *, struct uio *, size_t, size_t *, - uint64_t *); +int dt_ring_copy(struct dt_cpubuf *, struct uio *, size_t, size_t *); void dt_wakeup(struct dt_softc *); void dt_deferred_wakeup(void *); @@ -172,26 +191,24 @@ dtopen(dev_t dev, int flags, int mode, struct proc *p) if (atomic_load_int(&allowdt) == 0) return EPERM; - sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + sc = dtalloc(); if (sc == NULL) return ENOMEM; /* no sleep after this point */ if (dtlookup(unit) != NULL) { - free(sc, M_DEVBUF, sizeof(*sc)); + dtfree(sc); return EBUSY; } sc->ds_unit = unit; sc->ds_pid = p->p_p->ps_pid; TAILQ_INIT(&sc->ds_pcbs); - mtx_init(&sc->ds_mtx, IPL_HIGH); + sc->ds_lastcpu = 0; sc->ds_evtcnt = 0; - sc->ds_readevt = 0; - sc->ds_dropevt = 0; sc->ds_si = softintr_establish(IPL_SOFTCLOCK, dt_deferred_wakeup, sc); if (sc->ds_si == NULL) { - free(sc, M_DEVBUF, sizeof(*sc)); + dtfree(sc); return ENOMEM; } @@ -217,8 +234,7 @@ dtclose(dev_t dev, int flags, int mode, struct proc *p) dt_ioctl_record_stop(sc); dt_pcb_purge(&sc->ds_pcbs); softintr_disestablish(sc->ds_si); - - free(sc, M_DEVBUF, sizeof(*sc)); + dtfree(sc); return 0; } @@ -227,10 +243,9 @@ int dtread(dev_t dev, struct uio *uio, int flags) { struct dt_softc *sc; - struct dt_pcb *dp; - int error = 0, unit = minor(dev); + struct dt_cpubuf *dc; + int i, error = 0, unit = minor(dev); size_t count, max, read = 0; - uint64_t dropped = 0; sc = dtlookup(unit); KASSERT(sc != NULL); @@ -239,9 +254,9 @@ dtread(dev_t dev, struct uio *uio, int flags) if (max < 1) return (EMSGSIZE); - while (!sc->ds_evtcnt) { + while (!atomic_load_int(&sc->ds_evtcnt)) { sleep_setup(sc, PWAIT | PCATCH, "dtread"); - error = sleep_finish(0, !sc->ds_evtcnt); + error = sleep_finish(0, !atomic_load_int(&sc->ds_evtcnt)); if (error == EINTR || error == ERESTART) break; } @@ -249,9 +264,10 @@ dtread(dev_t dev, struct uio *uio, int flags) return error; KERNEL_ASSERT_LOCKED(); - TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) { + for (i = 0; i < ncpusfound; i++) { count = 0; - error = dt_pcb_ring_copy(dp, uio, max, &count, &dropped); + dc = &sc->ds_cpu[(sc->ds_lastcpu + i) % ncpusfound]; + error = dt_ring_copy(dc, uio, max, &count); if (error && count == 0) break; @@ -260,12 +276,9 @@ dtread(dev_t dev, struct uio *uio, int flags) if (max == 0) break; } + sc->ds_lastcpu += i % ncpusfound; - mtx_enter(&sc->ds_mtx); - sc->ds_evtcnt -= read; - sc->ds_readevt += read; - sc->ds_dropevt += dropped; - mtx_leave(&sc->ds_mtx); + atomic_sub_int(&sc->ds_evtcnt, read); return error; } @@ -339,6 +352,46 @@ dtlookup(int unit) return sc; } +struct dt_softc * +dtalloc(void) +{ + struct dt_softc *sc; + struct dt_evt *dtev; + int i; + + sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (sc == NULL) + return NULL; + + for (i = 0; i < ncpusfound; i++) { + dtev = mallocarray(DT_EVTRING_SIZE, sizeof(*dtev), M_DT, + M_WAITOK|M_CANFAIL|M_ZERO); + if (dtev == NULL) + break; + sc->ds_cpu[i].dc_ring = dtev; + } + if (i < ncpusfound) { + dtfree(sc); + return NULL; + } + + return sc; +} + +void +dtfree(struct dt_softc *sc) +{ + struct dt_evt *dtev; + int i; + + for (i = 0; i < ncpusfound; i++) { + dtev = sc->ds_cpu[i].dc_ring; + if (dtev != NULL) + free(dtev, M_DT, DT_EVTRING_SIZE * sizeof(*dtev)); + } + free(sc, M_DEVBUF, sizeof(*sc)); +} + int dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr) { @@ -434,11 +487,20 @@ dt_ioctl_get_args(struct dt_softc *sc, struct dtioc_arg *dtar) int dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst) { - mtx_enter(&sc->ds_mtx); - dtst->dtst_readevt = sc->ds_readevt; - dtst->dtst_dropevt = sc->ds_dropevt; - mtx_leave(&sc->ds_mtx); + struct dt_cpubuf *dc; + uint64_t readevt = 0, dropevt = 0; + int i; + for (i = 0; i < ncpusfound; i++) { + dc = &sc->ds_cpu[i]; + + membar_consumer(); + dropevt += dc->dc_dropevt; + readevt += dc->dc_readevt; + } + + dtst->dtst_readevt = readevt; + dtst->dtst_dropevt = dropevt; return 0; } @@ -637,28 +699,16 @@ dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc) dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO); if (dp == NULL) - goto bad; - - dp->dp_ring = mallocarray(DT_EVTRING_SIZE, sizeof(*dp->dp_ring), M_DT, - M_WAITOK|M_CANFAIL|M_ZERO); - if (dp->dp_ring == NULL) - goto bad; + return NULL; - mtx_init(&dp->dp_mtx, IPL_HIGH); dp->dp_sc = sc; dp->dp_dtp = dtp; return dp; -bad: - dt_pcb_free(dp); - return NULL; } void dt_pcb_free(struct dt_pcb *dp) { - if (dp == NULL) - return; - free(dp->dp_ring, M_DT, DT_EVTRING_SIZE * sizeof(*dp->dp_ring)); free(dp, M_DT, sizeof(*dp)); } @@ -681,21 +731,31 @@ dt_pcb_ring_get(struct dt_pcb *dp, int profiling) { struct proc *p = curproc; struct dt_evt *dtev; - int distance; + int prod, cons, distance; + struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()]; - mtx_enter(&dp->dp_mtx); - distance = dp->dp_prod - dp->dp_cons; + if (dc->dc_inevt == 1) + return NULL; + + dc->dc_inevt = 1; + + membar_consumer(); + prod = dc->dc_prod; + cons = dc->dc_cons; + distance = prod - cons; if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) { /* read(2) isn't finished */ - dp->dp_dropevt++; - mtx_leave(&dp->dp_mtx); + dc->dc_dropevt++; + membar_producer(); + + dc->dc_inevt = 0; return NULL; } /* * Save states in next free event slot. */ - dtev = &dp->dp_ring[dp->dp_cons]; + dtev = &dc->dc_ring[cons]; memset(dtev, 0, sizeof(*dtev)); dtev->dtev_pbn = dp->dp_dtp->dtp_pbn; @@ -722,25 +782,25 @@ dt_pcb_ring_get(struct dt_pcb *dp, int profiling) void dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev) { - MUTEX_ASSERT_LOCKED(&dp->dp_mtx); - KASSERT(dtev == &dp->dp_ring[dp->dp_cons]); + struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()]; + + KASSERT(dtev == &dc->dc_ring[dc->dc_cons]); + + dc->dc_cons = (dc->dc_cons + 1) % DT_EVTRING_SIZE; + membar_producer(); - dp->dp_cons = (dp->dp_cons + 1) % DT_EVTRING_SIZE; - mtx_leave(&dp->dp_mtx); + atomic_inc_int(&dp->dp_sc->ds_evtcnt); + dc->dc_inevt = 0; - mtx_enter(&dp->dp_sc->ds_mtx); - dp->dp_sc->ds_evtcnt++; - mtx_leave(&dp->dp_sc->ds_mtx); dt_wakeup(dp->dp_sc); } /* - * Copy at most `max' events from `dp', producing the same amount + * Copy at most `max' events from `dc', producing the same amount * of free slots. */ int -dt_pcb_ring_copy(struct dt_pcb *dp, struct uio *uio, size_t max, - size_t *rcvd, uint64_t *dropped) +dt_ring_copy(struct dt_cpubuf *dc, struct uio *uio, size_t max, size_t *rcvd) { size_t count, copied = 0; unsigned int cons, prod; @@ -748,12 +808,9 @@ dt_pcb_ring_copy(struct dt_pcb *dp, struct uio *uio, size_t max, KASSERT(max > 0); - mtx_enter(&dp->dp_mtx); - cons = dp->dp_cons; - prod = dp->dp_prod; - *dropped += dp->dp_dropevt; - dp->dp_dropevt = 0; - mtx_leave(&dp->dp_mtx); + membar_consumer(); + cons = dc->dc_cons; + prod = dc->dc_prod; if (cons < prod) count = DT_EVTRING_SIZE - prod; @@ -764,7 +821,7 @@ dt_pcb_ring_copy(struct dt_pcb *dp, struct uio *uio, size_t max, return 0; count = MIN(count, max); - error = uiomove(&dp->dp_ring[prod], count * sizeof(struct dt_evt), uio); + error = uiomove(&dc->dc_ring[prod], count * sizeof(struct dt_evt), uio); if (error) return error; copied += count; @@ -777,7 +834,7 @@ dt_pcb_ring_copy(struct dt_pcb *dp, struct uio *uio, size_t max, goto out; count = MIN(cons, (max - copied)); - error = uiomove(&dp->dp_ring[0], count * sizeof(struct dt_evt), uio); + error = uiomove(&dc->dc_ring[0], count * sizeof(struct dt_evt), uio); if (error) goto out; @@ -785,9 +842,9 @@ dt_pcb_ring_copy(struct dt_pcb *dp, struct uio *uio, size_t max, prod += count; out: - mtx_enter(&dp->dp_mtx); - dp->dp_prod = prod; - mtx_leave(&dp->dp_mtx); + dc->dc_readevt += copied; + dc->dc_prod = prod; + membar_producer(); *rcvd = copied; return error; diff --git a/sys/dev/dt/dtvar.h b/sys/dev/dt/dtvar.h index 5bb7ab0d771..c6cfa7f66b3 100644 --- a/sys/dev/dt/dtvar.h +++ b/sys/dev/dt/dtvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: dtvar.h,v 1.19 2024/04/06 11:18:02 mpi Exp $ */ +/* $OpenBSD: dtvar.h,v 1.20 2024/11/02 16:59:22 mpi Exp $ */ /* * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org> @@ -163,12 +163,6 @@ struct dt_pcb { SMR_SLIST_ENTRY(dt_pcb) dp_pnext; /* [K,S] next PCB per probe */ TAILQ_ENTRY(dt_pcb) dp_snext; /* [K] next PCB per softc */ - /* Event states ring */ - unsigned int dp_prod; /* [m] read index */ - unsigned int dp_cons; /* [m] write index */ - struct dt_evt *dp_ring; /* [m] ring of event states */ - struct mutex dp_mtx; - struct dt_softc *dp_sc; /* [I] related softc */ struct dt_probe *dp_dtp; /* [I] related probe */ uint64_t dp_evtflags; /* [I] event states to record */ @@ -177,9 +171,6 @@ struct dt_pcb { struct clockintr dp_clockintr; /* [D] profiling handle */ uint64_t dp_nsecs; /* [I] profiling period */ struct cpu_info *dp_cpu; /* [I] on which CPU */ - - /* Counters */ - uint64_t dp_dropevt; /* [m] # dropped event */ }; TAILQ_HEAD(dt_pcb_list, dt_pcb); |