summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Gwynne <dlg@cvs.openbsd.org>2020-07-06 03:56:52 +0000
committerDavid Gwynne <dlg@cvs.openbsd.org>2020-07-06 03:56:52 +0000
commit3e050f6d7149fc0653707ceba8ea50fd0eeef516 (patch)
tree2f4e656a98b5c35c37377a2c80d54cc0a1649a90
parentc3cc4803f315c77e9c28de5cd68b11a918d0745e (diff)
add kstat(4), a subsystem to let the kernel expose statistics to userland.
a kstat is an arbitrary chunk of data that a part of the kernel wants to expose to userland. data could mean just a chunk of raw bytes, but generally a kernel subsystem will provide a series of kstat key/value chunks. this code is loosely modelled on kstat in solaris, but with a bunch of simplifications (we don't want to provide write support for example). the named or key/value structure is significantly richer in this version too. eg, ssolaris kstat named data supports integer types, but this version offers differentiation between counters (like the number of packets transmitted on an interface) and gauges (like how long the transmit queue is) and lets kernel providers say what the units are (eg, packets vs bytes vs cycles). the main motivation for this is to improve the visibility of what the kernel is doing while it's running. i wrote this as part of the recent work we've been doing on multiqueue and rss/toeplitz so i could verify that network load is actually spread across multiple rings on a single nic. without this we would be wasting memory and interrupt vectors on multiple rings and still just using the 1st one, and noone would know cos there's no way to see what rings are being used. another thing that can become visible is the different counters that various network cards provide. i'm particularly interested in seeing if packets get dropped because the rings aren't filled fully, which is an effect we've never really observed directly. a small part of wanting this is cos i spend an annoying amount of time instrumenting the kernel when hacking code in it. if most of the scaffolding for the instrumentation is already there, i can avoid repeatedly writing that code and save time. iterated a few times with claudio@ and deraadt@
-rw-r--r--sys/dev/kstat.c689
-rw-r--r--sys/sys/kstat.h193
2 files changed, 882 insertions, 0 deletions
diff --git a/sys/dev/kstat.c b/sys/dev/kstat.c
new file mode 100644
index 00000000000..11e3ed8dd0d
--- /dev/null
+++ b/sys/dev/kstat.c
@@ -0,0 +1,689 @@
+/* $OpenBSD: kstat.c,v 1.1 2020/07/06 03:56:51 dlg Exp $ */
+
+/*
+ * Copyright (c) 2020 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/pool.h>
+#include <sys/time.h>
+
+/* for kstat_set_cpu */
+#include <sys/proc.h>
+#include <sys/sched.h>
+
+#include <sys/kstat.h>
+
+RBT_HEAD(kstat_id_tree, kstat);
+
+static inline int
+kstat_id_cmp(const struct kstat *a, const struct kstat *b)
+{
+ if (a->ks_id > b->ks_id)
+ return (1);
+ if (a->ks_id < b->ks_id)
+ return (-1);
+
+ return (0);
+}
+
+RBT_PROTOTYPE(kstat_id_tree, kstat, ks_id_entry, kstat_id_cmp);
+
+RBT_HEAD(kstat_pv_tree, kstat);
+
+static inline int
+kstat_pv_cmp(const struct kstat *a, const struct kstat *b)
+{
+ int rv;
+
+ rv = strcmp(a->ks_provider, b->ks_provider);
+ if (rv != 0)
+ return (rv);
+
+ if (a->ks_instance > b->ks_instance)
+ return (1);
+ if (a->ks_instance < b->ks_instance)
+ return (-1);
+
+ rv = strcmp(a->ks_name, b->ks_name);
+ if (rv != 0)
+ return (rv);
+
+ if (a->ks_unit > b->ks_unit)
+ return (1);
+ if (a->ks_unit < b->ks_unit)
+ return (-1);
+
+ return (0);
+}
+
+RBT_PROTOTYPE(kstat_pv_tree, kstat, ks_pv_entry, kstat_pv_cmp);
+
+RBT_HEAD(kstat_nm_tree, kstat);
+
+static inline int
+kstat_nm_cmp(const struct kstat *a, const struct kstat *b)
+{
+ int rv;
+
+ rv = strcmp(a->ks_name, b->ks_name);
+ if (rv != 0)
+ return (rv);
+
+ if (a->ks_unit > b->ks_unit)
+ return (1);
+ if (a->ks_unit < b->ks_unit)
+ return (-1);
+
+ rv = strcmp(a->ks_provider, b->ks_provider);
+ if (rv != 0)
+ return (rv);
+
+ if (a->ks_instance > b->ks_instance)
+ return (1);
+ if (a->ks_instance < b->ks_instance)
+ return (-1);
+
+ return (0);
+}
+
+RBT_PROTOTYPE(kstat_nm_tree, kstat, ks_nm_entry, kstat_nm_cmp);
+
+struct kstat_lock_ops {
+ void (*enter)(void *);
+ void (*leave)(void *);
+};
+
+#define kstat_enter(_ks) (_ks)->ks_lock_ops->enter((_ks)->ks_lock)
+#define kstat_leave(_ks) (_ks)->ks_lock_ops->leave((_ks)->ks_lock)
+
+const struct kstat_lock_ops kstat_rlock_ops = {
+ (void (*)(void *))rw_enter_read,
+ (void (*)(void *))rw_exit_read,
+};
+
+const struct kstat_lock_ops kstat_wlock_ops = {
+ (void (*)(void *))rw_enter_write,
+ (void (*)(void *))rw_exit_write,
+};
+
+const struct kstat_lock_ops kstat_mutex_ops = {
+ (void (*)(void *))mtx_enter,
+ (void (*)(void *))mtx_leave,
+};
+
+void kstat_cpu_enter(void *);
+void kstat_cpu_leave(void *);
+
+const struct kstat_lock_ops kstat_cpu_ops = {
+ kstat_cpu_enter,
+ kstat_cpu_leave,
+};
+
+struct rwlock kstat_lock = RWLOCK_INITIALIZER("kstat");
+
+/*
+ * The global state is versioned so changes to the set of kstats
+ * can be detected. This is an int so it can be read atomically on
+ * any arch, which is a ridiculous optimisation, really.
+ */
+unsigned int kstat_version = 0;
+
+/*
+ * kstat structures have a unique identifier so they can be found
+ * quickly. Identifiers are 64bit in the hope that it won't wrap
+ * during the runtime of a system. The identifiers start at 1 so that
+ * 0 can be used as the first value for userland to iterate with.
+ */
+uint64_t kstat_next_id = 1;
+
+struct kstat_id_tree kstat_id_tree = RBT_INITIALIZER();
+struct kstat_pv_tree kstat_pv_tree = RBT_INITIALIZER();
+struct kstat_nm_tree kstat_nm_tree = RBT_INITIALIZER();
+struct pool kstat_pool;
+
+struct rwlock kstat_default_lock = RWLOCK_INITIALIZER("kstatlk");
+
+int kstat_read(struct kstat *);
+int kstat_copy(struct kstat *, void *);
+
+int
+kstatattach(int num)
+{
+ /* XXX install system stats here */
+ return (0);
+}
+
+int
+kstatopen(dev_t dev, int flag, int mode, struct proc *p)
+{
+ return (0);
+}
+
+int
+kstatclose(dev_t dev, int flag, int mode, struct proc *p)
+{
+ return (0);
+}
+
+int
+kstatioc_enter(struct kstat_req *ksreq)
+{
+ int error;
+
+ error = rw_enter(&kstat_lock, RW_READ | RW_INTR);
+ if (error != 0)
+ return (error);
+
+ if (!ISSET(ksreq->ks_rflags, KSTATIOC_F_IGNVER) &&
+ ksreq->ks_version != kstat_version) {
+ error = EINVAL;
+ goto error;
+ }
+
+ return (0);
+
+error:
+ rw_exit(&kstat_lock);
+ return (error);
+}
+
+int
+kstatioc_leave(struct kstat_req *ksreq, struct kstat *ks)
+{
+ void *buf = NULL;
+ size_t klen = 0, ulen = 0;
+ struct timespec updated;
+ int error = 0;
+
+ if (ks == NULL) {
+ error = ENOENT;
+ goto error;
+ }
+
+ switch (ks->ks_state) {
+ case KSTAT_S_CREATED:
+ ksreq->ks_updated = ks->ks_created;
+ ksreq->ks_interval.tv_sec = 0;
+ ksreq->ks_interval.tv_nsec = 0;
+ ksreq->ks_datalen = 0;
+ ksreq->ks_dataver = 0;
+ break;
+
+ case KSTAT_S_INSTALLED:
+ ksreq->ks_dataver = ks->ks_dataver;
+ ksreq->ks_interval = ks->ks_interval;
+
+ if (ksreq->ks_data == NULL) {
+ /* userland doesn't want actual data, so shortcut */
+ kstat_enter(ks);
+ ksreq->ks_datalen = ks->ks_datalen;
+ ksreq->ks_updated = ks->ks_updated;
+ kstat_leave(ks);
+ break;
+ }
+
+ klen = ks->ks_datalen; /* KSTAT_F_REALLOC */
+ buf = malloc(klen, M_TEMP, M_WAITOK|M_CANFAIL);
+ if (buf == NULL) {
+ error = ENOMEM;
+ goto error;
+ }
+
+ kstat_enter(ks);
+ error = (*ks->ks_read)(ks);
+ if (error == 0) {
+ updated = ks->ks_updated;
+
+ /* KSTAT_F_REALLOC */
+ KASSERTMSG(ks->ks_datalen == klen,
+ "kstat doesnt support resized data yet");
+
+ error = (*ks->ks_copy)(ks, buf);
+ }
+ kstat_leave(ks);
+
+ if (error != 0)
+ goto error;
+
+ ulen = ksreq->ks_datalen;
+ ksreq->ks_datalen = klen; /* KSTAT_F_REALLOC */
+ ksreq->ks_updated = updated;
+ break;
+ default:
+ panic("ks %p unexpected state %u", ks, ks->ks_state);
+ }
+
+ ksreq->ks_version = kstat_version;
+ ksreq->ks_id = ks->ks_id;
+
+ if (strlcpy(ksreq->ks_provider, ks->ks_provider,
+ sizeof(ksreq->ks_provider)) >= sizeof(ksreq->ks_provider))
+ panic("kstat %p provider string has grown", ks);
+ ksreq->ks_instance = ks->ks_instance;
+ if (strlcpy(ksreq->ks_name, ks->ks_name,
+ sizeof(ksreq->ks_name)) >= sizeof(ksreq->ks_name))
+ panic("kstat %p name string has grown", ks);
+ ksreq->ks_unit = ks->ks_unit;
+
+ ksreq->ks_created = ks->ks_created;
+ ksreq->ks_type = ks->ks_type;
+ ksreq->ks_state = ks->ks_state;
+
+error:
+ rw_exit(&kstat_lock);
+
+ if (buf != NULL) {
+ if (error == 0)
+ error = copyout(buf, ksreq->ks_data, min(klen, ulen));
+
+ free(buf, M_TEMP, klen);
+ }
+
+ return (error);
+}
+
+int
+kstatioc_find_id(struct kstat_req *ksreq)
+{
+ struct kstat *ks, key;
+ int error;
+
+ error = kstatioc_enter(ksreq);
+ if (error != 0)
+ return (error);
+
+ key.ks_id = ksreq->ks_id;
+
+ ks = RBT_FIND(kstat_id_tree, &kstat_id_tree, &key);
+
+ return (kstatioc_leave(ksreq, ks));
+}
+
+int
+kstatioc_nfind_id(struct kstat_req *ksreq)
+{
+ struct kstat *ks, key;
+ int error;
+
+ error = kstatioc_enter(ksreq);
+ if (error != 0)
+ return (error);
+
+ key.ks_id = ksreq->ks_id;
+
+ ks = RBT_NFIND(kstat_id_tree, &kstat_id_tree, &key);
+
+ return (kstatioc_leave(ksreq, ks));
+}
+
+int
+kstatioc_find_pv(struct kstat_req *ksreq)
+{
+ struct kstat *ks, key;
+ int error;
+
+ error = kstatioc_enter(ksreq);
+ if (error != 0)
+ return (error);
+
+ key.ks_provider = ksreq->ks_provider;
+ key.ks_instance = ksreq->ks_instance;
+ key.ks_name = ksreq->ks_name;
+ key.ks_unit = ksreq->ks_unit;
+
+ ks = RBT_FIND(kstat_pv_tree, &kstat_pv_tree, &key);
+
+ return (kstatioc_leave(ksreq, ks));
+}
+
+int
+kstatioc_nfind_pv(struct kstat_req *ksreq)
+{
+ struct kstat *ks, key;
+ int error;
+
+ error = kstatioc_enter(ksreq);
+ if (error != 0)
+ return (error);
+
+ key.ks_provider = ksreq->ks_provider;
+ key.ks_instance = ksreq->ks_instance;
+ key.ks_name = ksreq->ks_name;
+ key.ks_unit = ksreq->ks_unit;
+
+ ks = RBT_NFIND(kstat_pv_tree, &kstat_pv_tree, &key);
+
+ return (kstatioc_leave(ksreq, ks));
+}
+
+int
+kstatioc_find_nm(struct kstat_req *ksreq)
+{
+ struct kstat *ks, key;
+ int error;
+
+ error = kstatioc_enter(ksreq);
+ if (error != 0)
+ return (error);
+
+ key.ks_name = ksreq->ks_name;
+ key.ks_unit = ksreq->ks_unit;
+ key.ks_provider = ksreq->ks_provider;
+ key.ks_instance = ksreq->ks_instance;
+
+ ks = RBT_FIND(kstat_nm_tree, &kstat_nm_tree, &key);
+
+ return (kstatioc_leave(ksreq, ks));
+}
+
+int
+kstatioc_nfind_nm(struct kstat_req *ksreq)
+{
+ struct kstat *ks, key;
+ int error;
+
+ error = kstatioc_enter(ksreq);
+ if (error != 0)
+ return (error);
+
+ key.ks_name = ksreq->ks_name;
+ key.ks_unit = ksreq->ks_unit;
+ key.ks_provider = ksreq->ks_provider;
+ key.ks_instance = ksreq->ks_instance;
+
+ ks = RBT_NFIND(kstat_nm_tree, &kstat_nm_tree, &key);
+
+ return (kstatioc_leave(ksreq, ks));
+}
+
+int
+kstatioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
+{
+ struct kstat_req *ksreq = (struct kstat_req *)data;
+ int error = 0;
+
+ KERNEL_UNLOCK();
+
+ switch (cmd) {
+ case KSTATIOC_VERSION:
+ *(unsigned int *)data = kstat_version;
+ break;
+
+ case KSTATIOC_FIND_ID:
+ error = kstatioc_find_id(ksreq);
+ break;
+ case KSTATIOC_NFIND_ID:
+ error = kstatioc_nfind_id(ksreq);
+ break;
+ case KSTATIOC_FIND_PROVIDER:
+ error = kstatioc_find_pv(ksreq);
+ break;
+ case KSTATIOC_NFIND_PROVIDER:
+ error = kstatioc_nfind_pv(ksreq);
+ break;
+ case KSTATIOC_FIND_NAME:
+ error = kstatioc_find_nm(ksreq);
+ break;
+ case KSTATIOC_NFIND_NAME:
+ error = kstatioc_nfind_nm(ksreq);
+ break;
+
+ default:
+ error = ENOTTY;
+ break;
+ }
+
+ KERNEL_LOCK();
+
+ return (error);
+}
+
+void
+kstat_init(void)
+{
+ static int initialized = 0;
+
+ if (initialized)
+ return;
+
+ pool_init(&kstat_pool, sizeof(struct kstat), 0, IPL_NONE,
+ PR_WAITOK | PR_RWLOCK, "kstatmem", NULL);
+
+ initialized = 1;
+}
+
+int
+kstat_strcheck(const char *str)
+{
+ size_t i, l;
+
+ l = strlen(str);
+ if (l == 0 || l >= KSTAT_STRLEN)
+ return (-1);
+ for (i = 0; i < l; i++) {
+ int ch = str[i];
+ if (ch >= 'a' && ch <= 'z')
+ continue;
+ if (ch >= 'A' && ch <= 'Z')
+ continue;
+ if (ch >= '0' && ch <= '9')
+ continue;
+ switch (ch) {
+ case '-':
+ case '_':
+ case '.':
+ break;
+ default:
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+struct kstat *
+kstat_create(const char *provider, unsigned int instance,
+ const char *name, unsigned int unit,
+ unsigned int type, unsigned int flags)
+{
+ struct kstat *ks, *oks;
+
+ if (kstat_strcheck(provider) == -1)
+ panic("invalid provider string");
+ if (kstat_strcheck(name) == -1)
+ panic("invalid name string");
+
+ kstat_init();
+
+ ks = pool_get(&kstat_pool, PR_WAITOK|PR_ZERO);
+
+ ks->ks_provider = provider;
+ ks->ks_instance = instance;
+ ks->ks_name = name;
+ ks->ks_unit = unit;
+ ks->ks_flags = flags;
+ ks->ks_type = type;
+ ks->ks_state = KSTAT_S_CREATED;
+
+ getnanouptime(&ks->ks_created);
+ ks->ks_updated = ks->ks_created;
+
+ ks->ks_lock = &kstat_default_lock;
+ ks->ks_lock_ops = &kstat_wlock_ops;
+ ks->ks_read = kstat_read;
+ ks->ks_copy = kstat_copy;
+
+ rw_enter_write(&kstat_lock);
+ ks->ks_id = kstat_next_id;
+
+ oks = RBT_INSERT(kstat_pv_tree, &kstat_pv_tree, ks);
+ if (oks == NULL) {
+ /* commit */
+ kstat_next_id++;
+ kstat_version++;
+
+ oks = RBT_INSERT(kstat_nm_tree, &kstat_nm_tree, ks);
+ if (oks != NULL)
+ panic("kstat name collision! (%llu)", ks->ks_id);
+
+ oks = RBT_INSERT(kstat_id_tree, &kstat_id_tree, ks);
+ if (oks != NULL)
+ panic("kstat id collision! (%llu)", ks->ks_id);
+ }
+ rw_exit_write(&kstat_lock);
+
+ if (oks != NULL) {
+ pool_put(&kstat_pool, ks);
+ return (NULL);
+ }
+
+ return (ks);
+}
+
+void
+kstat_set_rlock(struct kstat *ks, struct rwlock *rwl)
+{
+ KASSERT(ks->ks_state == KSTAT_S_CREATED);
+
+ ks->ks_lock = rwl;
+ ks->ks_lock_ops = &kstat_rlock_ops;
+}
+
+void
+kstat_set_wlock(struct kstat *ks, struct rwlock *rwl)
+{
+ KASSERT(ks->ks_state == KSTAT_S_CREATED);
+
+ ks->ks_lock = rwl;
+ ks->ks_lock_ops = &kstat_wlock_ops;
+}
+
+void
+kstat_set_mutex(struct kstat *ks, struct mutex *mtx)
+{
+ KASSERT(ks->ks_state == KSTAT_S_CREATED);
+
+ ks->ks_lock = mtx;
+ ks->ks_lock_ops = &kstat_mutex_ops;
+}
+
+void
+kstat_cpu_enter(void *p)
+{
+ struct cpu_info *ci = p;
+ sched_peg_curproc(ci);
+}
+
+void
+kstat_cpu_leave(void *p)
+{
+ atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
+}
+
+void
+kstat_set_cpu(struct kstat *ks, struct cpu_info *ci)
+{
+ KASSERT(ks->ks_state == KSTAT_S_CREATED);
+
+ ks->ks_lock = ci;
+ ks->ks_lock_ops = &kstat_cpu_ops;
+}
+
+int
+kstat_read_nop(struct kstat *ks)
+{
+ return (0);
+}
+
+void
+kstat_install(struct kstat *ks)
+{
+ if (!ISSET(ks->ks_flags, KSTAT_F_REALLOC)) {
+ KASSERTMSG(ks->ks_copy != NULL || ks->ks_data != NULL,
+ "kstat %p %s:%u:%s:%u must provide ks_copy or ks_data", ks,
+ ks->ks_provider, ks->ks_instance, ks->ks_name, ks->ks_unit);
+ KASSERT(ks->ks_datalen > 0);
+ }
+
+ rw_enter_write(&kstat_lock);
+ ks->ks_state = KSTAT_S_INSTALLED;
+ rw_exit_write(&kstat_lock);
+}
+
+void
+kstat_destroy(struct kstat *ks)
+{
+ rw_enter_write(&kstat_lock);
+ RBT_REMOVE(kstat_id_tree, &kstat_id_tree, ks);
+ RBT_REMOVE(kstat_pv_tree, &kstat_pv_tree, ks);
+ RBT_REMOVE(kstat_nm_tree, &kstat_nm_tree, ks);
+ kstat_version++;
+ rw_exit_write(&kstat_lock);
+
+ pool_put(&kstat_pool, ks);
+}
+
+int
+kstat_read(struct kstat *ks)
+{
+ getnanouptime(&ks->ks_updated);
+ return (0);
+}
+
+int
+kstat_copy(struct kstat *ks, void *buf)
+{
+ memcpy(buf, ks->ks_data, ks->ks_datalen);
+ return (0);
+}
+
+RBT_GENERATE(kstat_id_tree, kstat, ks_id_entry, kstat_id_cmp);
+RBT_GENERATE(kstat_pv_tree, kstat, ks_pv_entry, kstat_pv_cmp);
+RBT_GENERATE(kstat_nm_tree, kstat, ks_nm_entry, kstat_nm_cmp);
+
+void
+kstat_kv_init(struct kstat_kv *kv, const char *name, enum kstat_kv_type type)
+{
+ memset(kv, 0, sizeof(*kv));
+ strlcpy(kv->kv_key, name, sizeof(kv->kv_key)); /* XXX truncated? */
+ kv->kv_type = type;
+ kv->kv_unit = KSTAT_KV_U_NONE;
+}
+
+void
+kstat_kv_unit_init(struct kstat_kv *kv, const char *name,
+ enum kstat_kv_type type, enum kstat_kv_unit unit)
+{
+ switch (type) {
+ case KSTAT_KV_T_COUNTER64:
+ case KSTAT_KV_T_COUNTER32:
+ case KSTAT_KV_T_UINT64:
+ case KSTAT_KV_T_INT64:
+ case KSTAT_KV_T_UINT32:
+ case KSTAT_KV_T_INT32:
+ break;
+ default:
+ panic("kv unit init %s: unit for non-integer type", name);
+ }
+
+ memset(kv, 0, sizeof(*kv));
+ strlcpy(kv->kv_key, name, sizeof(kv->kv_key)); /* XXX truncated? */
+ kv->kv_type = type;
+ kv->kv_unit = unit;
+}
diff --git a/sys/sys/kstat.h b/sys/sys/kstat.h
new file mode 100644
index 00000000000..456481057c2
--- /dev/null
+++ b/sys/sys/kstat.h
@@ -0,0 +1,193 @@
+/* $OpenBSD: kstat.h,v 1.1 2020/07/06 03:56:51 dlg Exp $ */
+
+/*
+ * Copyright (c) 2020 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _SYS_KSTAT_H_
+#define _SYS_KSTAT_H_
+
+#include <sys/ioccom.h>
+
+#define KSTAT_STRLEN 32
+
+#define KSTAT_T_RAW 0
+#define KSTAT_T_KV 1
+#define KSTAT_T_COUNTERS 2
+
+struct kstat_req {
+ unsigned int ks_rflags;
+#define KSTATIOC_F_IGNVER (1 << 0)
+ /* the current version of the kstat subsystem */
+ unsigned int ks_version;
+
+ uint64_t ks_id;
+
+ char ks_provider[KSTAT_STRLEN];
+ unsigned int ks_instance;
+ char ks_name[KSTAT_STRLEN];
+ unsigned int ks_unit;
+
+ struct timespec ks_created;
+ struct timespec ks_updated;
+ struct timespec ks_interval;
+ unsigned int ks_type;
+ unsigned int ks_state;
+
+ void *ks_data;
+ size_t ks_datalen;
+ unsigned int ks_dataver;
+};
+
+/* ioctls */
+
+#define KSTATIOC_VERSION _IOR('k', 1, unsigned int)
+#define KSTATIOC_FIND_ID _IOWR('k', 2, struct kstat_req)
+#define KSTATIOC_NFIND_ID _IOWR('k', 3, struct kstat_req)
+#define KSTATIOC_FIND_PROVIDER _IOWR('k', 4, struct kstat_req)
+#define KSTATIOC_NFIND_PROVIDER _IOWR('k', 5, struct kstat_req)
+#define KSTATIOC_FIND_NAME _IOWR('k', 6, struct kstat_req)
+#define KSTATIOC_NFIND_NAME _IOWR('k', 7, struct kstat_req)
+
+/* named data */
+
+#define KSTAT_KV_NAMELEN 16
+#define KSTAT_KV_ALIGN sizeof(uint64_t)
+
+enum kstat_kv_type {
+ KSTAT_KV_T_NULL,
+ KSTAT_KV_T_BOOL,
+ KSTAT_KV_T_COUNTER64,
+ KSTAT_KV_T_COUNTER32,
+ KSTAT_KV_T_UINT64,
+ KSTAT_KV_T_INT64,
+ KSTAT_KV_T_UINT32,
+ KSTAT_KV_T_INT32,
+ KSTAT_KV_T_ISTR, /* inline string */
+ KSTAT_KV_T_STR, /* trailing string */
+ KSTAT_KV_T_BYTES, /* trailing bytes */
+ KSTAT_KV_T_TEMP, /* temperature (uK) */
+};
+
+/* units only apply to integer types */
+enum kstat_kv_unit {
+ KSTAT_KV_U_NONE = 0,
+ KSTAT_KV_U_PACKETS, /* packets */
+ KSTAT_KV_U_BYTES, /* bytes */
+ KSTAT_KV_U_CYCLES, /* cycles */
+};
+
+struct kstat_kv {
+ char kv_key[KSTAT_KV_NAMELEN];
+ union {
+ char v_istr[16];
+ unsigned int v_bool;
+ uint64_t v_u64;
+ int64_t v_s64;
+ uint32_t v_u32;
+ int32_t v_s32;
+ size_t v_len;
+ } kv_v;
+ enum kstat_kv_type kv_type;
+ enum kstat_kv_unit kv_unit;
+} __aligned(KSTAT_KV_ALIGN);
+
+#define kstat_kv_istr(_kv) (_kv)->kv_v.v_istr
+#define kstat_kv_bool(_kv) (_kv)->kv_v.v_bool
+#define kstat_kv_u64(_kv) (_kv)->kv_v.v_u64
+#define kstat_kv_s64(_kv) (_kv)->kv_v.v_s64
+#define kstat_kv_u32(_kv) (_kv)->kv_v.v_u32
+#define kstat_kv_s32(_kv) (_kv)->kv_v.v_s32
+#define kstat_kv_len(_kv) (_kv)->kv_v.v_len
+#define kstat_kv_temp(_kv) (_kv)->kv_v.v_u64
+
+#ifdef _KERNEL
+
+#include <sys/tree.h>
+
+struct kstat_lock_ops;
+
+struct kstat {
+ uint64_t ks_id;
+
+ const char *ks_provider;
+ unsigned int ks_instance;
+ const char *ks_name;
+ unsigned int ks_unit;
+
+ unsigned int ks_type;
+ unsigned int ks_flags;
+#define KSTAT_F_REALLOC (1 << 0)
+ unsigned int ks_state;
+#define KSTAT_S_CREATED 0
+#define KSTAT_S_INSTALLED 1
+
+ struct timespec ks_created;
+ RBT_ENTRY(kstat) ks_id_entry;
+ RBT_ENTRY(kstat) ks_pv_entry;
+ RBT_ENTRY(kstat) ks_nm_entry;
+
+ /* the driver can update these between kstat creation and install */
+ unsigned int ks_dataver;
+ void *ks_softc;
+ void *ks_ptr;
+ int (*ks_read)(struct kstat *);
+ int (*ks_copy)(struct kstat *, void *);
+
+ const struct kstat_lock_ops *
+ ks_lock_ops;
+ void *ks_lock;
+
+ /* the data that is updated by ks_read */
+ void *ks_data;
+ size_t ks_datalen;
+ struct timespec ks_updated;
+ struct timespec ks_interval;
+};
+
+struct kstat *kstat_create(const char *, unsigned int,
+ const char *, unsigned int,
+ unsigned int, unsigned int);
+
+void kstat_set_rlock(struct kstat *, struct rwlock *);
+void kstat_set_wlock(struct kstat *, struct rwlock *);
+void kstat_set_mutex(struct kstat *, struct mutex *);
+void kstat_set_cpu(struct kstat *, struct cpu_info *);
+
+int kstat_read_nop(struct kstat *);
+
+void kstat_install(struct kstat *);
+void kstat_destroy(struct kstat *);
+
+/*
+ * kstat_kv api
+ */
+
+#define KSTAT_KV_UNIT_INITIALIZER(_key, _type, _unit) { \
+ .kv_key = (_key), \
+ .kv_type = (_type), \
+ .kv_unit = (_unit), \
+}
+
+#define KSTAT_KV_INITIALIZER(_key, _type) \
+ KSTAT_KV_UNIT_INITIALIZER((_key), (_type), KSTAT_KV_U_NONE)
+
+void kstat_kv_init(struct kstat_kv *, const char *, enum kstat_kv_type);
+void kstat_kv_unit_init(struct kstat_kv *, const char *,
+ enum kstat_kv_type, enum kstat_kv_unit);
+
+#endif /* _KERNEL */
+
+#endif /* _SYS_KSTAT_H_ */