summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorJonathan Matthew <jmatthew@cvs.openbsd.org>2024-09-13 09:57:35 +0000
committerJonathan Matthew <jmatthew@cvs.openbsd.org>2024-09-13 09:57:35 +0000
commit76570ae0ba04556208844af4261e39be7fee6156 (patch)
tree5c455f0e5ba0d0408720d6cc5ec62ddffdcf6b90 /sys
parentfadaafebf2027a58aeae09f941f61487e16f28c0 (diff)
Add sensors based on information in the SMART/health log page,
showing overall device health and temperature. tested by many (a while ago) tweaks from gkoehler@ kettenis@ dv@ ok kettenis@ jca@ (earlier version), dlg@
Diffstat (limited to 'sys')
-rw-r--r--sys/dev/ic/nvme.c99
-rw-r--r--sys/dev/ic/nvmereg.h40
-rw-r--r--sys/dev/ic/nvmevar.h9
3 files changed, 145 insertions, 3 deletions
diff --git a/sys/dev/ic/nvme.c b/sys/dev/ic/nvme.c
index 7e74d293152..f54454b8db7 100644
--- a/sys/dev/ic/nvme.c
+++ b/sys/dev/ic/nvme.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: nvme.c,v 1.122 2024/09/01 03:08:56 jsg Exp $ */
+/* $OpenBSD: nvme.c,v 1.123 2024/09/13 09:57:34 jmatthew Exp $ */
/*
* Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
@@ -60,6 +60,10 @@ void nvme_dumpregs(struct nvme_softc *);
int nvme_identify(struct nvme_softc *, u_int);
void nvme_fill_identify(struct nvme_softc *, struct nvme_ccb *, void *);
+#ifndef SMALL_KERNEL
+void nvme_refresh_sensors(void *);
+#endif
+
int nvme_ccbs_alloc(struct nvme_softc *, u_int);
void nvme_ccbs_free(struct nvme_softc *, u_int);
@@ -158,6 +162,7 @@ static const struct nvme_ops nvme_ops = {
#define NVME_TIMO_QOP 5000 /* ms to create/delete queue */
#define NVME_TIMO_PT 5000 /* ms to complete passthrough */
#define NVME_TIMO_IDENT 10000 /* ms to probe/identify */
+#define NVME_TIMO_LOG_PAGE 5000 /* ms to read log pages */
#define NVME_TIMO_DELAYNS 10 /* ns to delay() in poll loop */
/*
@@ -407,6 +412,31 @@ nvme_attach(struct nvme_softc *sc)
saa.saa_quirks = saa.saa_flags = 0;
saa.saa_wwpn = saa.saa_wwnn = 0;
+ strlcpy(sc->sc_sensordev.xname, DEVNAME(sc), sizeof(sc->sc_sensordev.xname));
+
+#ifndef SMALL_KERNEL
+ sc->sc_temp_sensor.type = SENSOR_TEMP;
+ sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN;
+ sensor_attach(&sc->sc_sensordev, &sc->sc_temp_sensor);
+
+ sc->sc_usage_sensor.type = SENSOR_PERCENT;
+ sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN;
+ strlcpy(sc->sc_usage_sensor.desc, "endurance used",
+ sizeof(sc->sc_usage_sensor.desc));
+ sensor_attach(&sc->sc_sensordev, &sc->sc_usage_sensor);
+
+ sc->sc_spare_sensor.type = SENSOR_PERCENT;
+ sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN;
+ strlcpy(sc->sc_spare_sensor.desc, "available spare",
+ sizeof(sc->sc_spare_sensor.desc));
+ sensor_attach(&sc->sc_sensordev, &sc->sc_spare_sensor);
+
+ if (sensor_task_register(sc, nvme_refresh_sensors, 60) == NULL)
+ goto free_q;
+
+ sensordev_install(&sc->sc_sensordev);
+#endif
+
sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
&saa, scsiprint);
#if NBIO > 0
@@ -2128,3 +2158,70 @@ nvme_bioctl_disk(struct nvme_softc *sc, struct bioc_disk *bd)
return 0;
}
#endif /* NBIO > 0 */
+
+#ifndef SMALL_KERNEL
+void
+nvme_refresh_sensors(void *arg)
+{
+ struct nvme_softc *sc = arg;
+ struct nvme_sqe sqe;
+ struct nvme_dmamem *mem = NULL;
+ struct nvme_ccb *ccb = NULL;
+ struct nvm_smart_health *health;
+ uint32_t dwlen;
+ uint8_t cw;
+ int flags;
+ int64_t temp;
+
+ ccb = nvme_ccb_get(sc);
+ if (ccb == NULL)
+ goto failed;
+
+ mem = nvme_dmamem_alloc(sc, sizeof(*health));
+ if (mem == NULL)
+ goto failed;
+ nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
+
+ dwlen = (sizeof(*health) >> 2) - 1;
+ memset(&sqe, 0, sizeof(sqe));
+ sqe.opcode = NVM_ADMIN_GET_LOG_PG;
+ htolem32(&sqe.nsid, 0xffffffff);
+ htolem32(&sqe.cdw10, (dwlen << 16 | NVM_LOG_PAGE_SMART_HEALTH));
+ htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem));
+
+ ccb->ccb_done = nvme_empty_done;
+ ccb->ccb_cookie = &sqe;
+ flags = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_LOG_PAGE);
+
+ nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
+
+ if (flags != 0)
+ goto failed;
+
+ health = NVME_DMA_KVA(mem);
+ cw = health->critical_warning;
+
+ sc->sc_temp_sensor.status = (cw & NVM_HEALTH_CW_TEMP) ?
+ SENSOR_S_CRIT : SENSOR_S_OK;
+ temp = letoh16(health->temperature);
+ sc->sc_temp_sensor.value = (temp * 1000000) + 150000;
+
+ sc->sc_spare_sensor.status = (cw & NVM_HEALTH_CW_SPARE) ?
+ SENSOR_S_CRIT : SENSOR_S_OK;
+ sc->sc_spare_sensor.value = health->avail_spare * 1000;
+
+ sc->sc_usage_sensor.status = SENSOR_S_OK;
+ sc->sc_usage_sensor.value = health->percent_used * 1000;
+ goto done;
+
+ failed:
+ sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN;
+ sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN;
+ sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN;
+ done:
+ if (mem != NULL)
+ nvme_dmamem_free(sc, mem);
+ if (ccb != NULL)
+ nvme_ccb_put(sc, ccb);
+}
+#endif /* SMALL_KERNEL */
diff --git a/sys/dev/ic/nvmereg.h b/sys/dev/ic/nvmereg.h
index 2a28c6af83e..84a4533b84c 100644
--- a/sys/dev/ic/nvmereg.h
+++ b/sys/dev/ic/nvmereg.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: nvmereg.h,v 1.15 2024/05/24 12:04:07 krw Exp $ */
+/* $OpenBSD: nvmereg.h,v 1.16 2024/09/13 09:57:34 jmatthew Exp $ */
/*
* Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
@@ -415,3 +415,41 @@ struct nvm_identify_namespace {
u_int8_t vs[3712];
} __packed __aligned(8);
+
+#define NVM_LOG_PAGE_SMART_HEALTH 0x02
+struct nvm_smart_health {
+ u_int8_t critical_warning;
+#define NVM_HEALTH_CW_SPARE (1 << 0)
+#define NVM_HEALTH_CW_TEMP (1 << 1)
+#define NVM_HEALTH_CW_MEDIA (1 << 2)
+#define NVM_HEALTH_CW_READONLY (1 << 3)
+#define NVM_HEALTH_CW_VOLATILE (1 << 4)
+#define NVM_HEALTH_CW_PMR (1 << 5)
+ u_int16_t temperature;
+ u_int8_t avail_spare;
+ u_int8_t avail_spare_threshold;
+ u_int8_t percent_used;
+ u_int8_t end_grp_summary; /* 1.4+ */
+
+ u_int8_t _reserved1[25];
+
+ u_int64_t data_units_read[2];
+ u_int64_t data_units_written[2];
+ u_int64_t host_read_commands[2];
+ u_int64_t host_write_commands[2];
+ u_int64_t busy_time[2];
+ u_int64_t power_cycles[2];
+ u_int64_t power_on_hours[2];
+ u_int64_t unsafe_shutdowns[2];
+ u_int64_t integrity_errors[2];
+ u_int64_t error_log_entries[2];
+ u_int32_t warn_temp_time; /* 1.2+ */
+ u_int32_t crit_temp_time; /* 1.2+ */
+ u_int16_t temp_sensors[8]; /* 1.2+ */
+ u_int32_t therm_mgmt_count_1; /* 1.3+ */
+ u_int32_t therm_mgmt_count_2; /* 1.3+ */
+ u_int32_t therm_mgmt_time_1; /* 1.3+ */
+ u_int32_t therm_mgmt_time_2; /* 1.3+ */
+
+ u_int8_t _reserved2[280];
+} __packed __aligned(8);
diff --git a/sys/dev/ic/nvmevar.h b/sys/dev/ic/nvmevar.h
index 15137057818..7fe0b26fd8f 100644
--- a/sys/dev/ic/nvmevar.h
+++ b/sys/dev/ic/nvmevar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: nvmevar.h,v 1.30 2024/06/26 21:41:30 asou Exp $ */
+/* $OpenBSD: nvmevar.h,v 1.31 2024/09/13 09:57:34 jmatthew Exp $ */
/*
* Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
@@ -16,6 +16,8 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#include <sys/sensors.h>
+
#define NVME_IO_Q 1
#define NVME_HIB_Q 2
#define NVME_MAXPHYS (128 * 1024)
@@ -126,6 +128,11 @@ struct nvme_softc {
struct scsi_iopool sc_iopool;
struct rwlock sc_lock;
struct scsibus_softc *sc_scsibus;
+
+ struct ksensordev sc_sensordev;
+ struct ksensor sc_temp_sensor;
+ struct ksensor sc_spare_sensor;
+ struct ksensor sc_usage_sensor;
};
#define DEVNAME(_sc) ((_sc)->sc_dev.dv_xname)