summaryrefslogtreecommitdiff
path: root/sys/dev
diff options
context:
space:
mode:
authorStefan Fritsch <sf@cvs.openbsd.org>2013-01-12 13:02:23 +0000
committerStefan Fritsch <sf@cvs.openbsd.org>2013-01-12 13:02:23 +0000
commitfbd4435730f8e7bc8c97f5f39a95b7a8f5793c23 (patch)
tree49d4e4b5c508a6eff4797cc135a8c5b8a03696b3 /sys/dev
parent81cf832299e5c2e31c6bb6934d93741df73a9590 (diff)
add viomb driver for virtio balloon device
This allows the host to request some memory back from the openbsd guest in case of a shortage. Patch by Dinar Talypov OK jasper@ OK jmc@ (man page)
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/pci/files.pci6
-rw-r--r--sys/dev/pci/viomb.c462
2 files changed, 467 insertions, 1 deletions
diff --git a/sys/dev/pci/files.pci b/sys/dev/pci/files.pci
index 4f8ad7545f9..b4af38167a9 100644
--- a/sys/dev/pci/files.pci
+++ b/sys/dev/pci/files.pci
@@ -1,4 +1,4 @@
-# $OpenBSD: files.pci,v 1.290 2012/11/29 23:36:34 stsp Exp $
+# $OpenBSD: files.pci,v 1.291 2013/01/12 13:02:22 sf Exp $
# $NetBSD: files.pci,v 1.20 1996/09/24 17:47:15 christos Exp $
#
# Config file and device description for machine-independent PCI code.
@@ -841,3 +841,7 @@ file dev/pci/if_vio.c vio
device vioblk: scsi
attach vioblk at virtio
file dev/pci/vioblk.c vioblk
+
+device viomb
+attach viomb at virtio
+file dev/pci/viomb.c viomb
diff --git a/sys/dev/pci/viomb.c b/sys/dev/pci/viomb.c
new file mode 100644
index 00000000000..4ad2b1f0322
--- /dev/null
+++ b/sys/dev/pci/viomb.c
@@ -0,0 +1,462 @@
+/* $OpenBSD: viomb.c,v 1.1 2013/01/12 13:02:22 sf Exp $ */
+/* $NetBSD: viomb.c,v 1.1 2011/10/30 12:12:21 hannken Exp $ */
+/*
+ * Copyright (c) 2012 Talypov Dinar <dinar@i-nk.ru>
+ * Copyright (c) 2010 Minoura Makoto.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/device.h>
+#include <sys/workq.h>
+#include <sys/stdint.h>
+#include <uvm/uvm.h>
+#include <dev/pci/pcidevs.h>
+#include <dev/pci/pcivar.h>
+
+#include <dev/pci/virtioreg.h>
+#include <dev/pci/virtiovar.h>
+
+#if VIRTIO_PAGE_SIZE!=PAGE_SIZE
+#error non-4K page sizes are not supported yet
+#endif
+
+#define DEVNAME(sc) sc->sc_dev.dv_xname
+#if VIRTIO_DEBUG
+#define VIOMBDEBUG(sc, format, args...) \
+ do { printf("%s: " format, sc->sc_dev.dv_xname, ##args);} \
+ while (0)
+#else
+#define VIOMBDEBUG(...)
+#endif
+
+/* flags used to specify kind of operation,
+ * actually should be moved to virtiovar.h
+ */
+#define VRING_READ 0
+#define VRING_WRITE 1
+
+/* notify or don't notify */
+#define VRING_NO_NOTIFY 0
+#define VRING_NOTIFY 1
+
+/* Configuration registers */
+#define VIRTIO_BALLOON_CONFIG_NUM_PAGES 0 /* 32bit */
+#define VIRTIO_BALLOON_CONFIG_ACTUAL 4 /* 32bit */
+
+/* Feature bits */
+#define VIRTIO_BALLOON_F_MUST_TELL_HOST (1<<0)
+#define VIRTIO_BALLOON_F_STATS_VQ (1<<1)
+
+static const struct virtio_feature_name viomb_feature_names[] = {
+ {VIRTIO_BALLOON_F_MUST_TELL_HOST, "TellHost"},
+ {VIRTIO_BALLOON_F_STATS_VQ, "StatVQ"},
+ {0, NULL}
+};
+#define PGS_PER_REQ 256 /* 1MB, 4KB/page */
+#define VQ_INFLATE 0
+#define VQ_DEFLATE 1
+
+struct balloon_req {
+ bus_dmamap_t bl_dmamap;
+ struct pglist bl_pglist;
+ int bl_nentries;
+ u_int32_t bl_pages[PGS_PER_REQ];
+};
+
+struct viomb_softc {
+ struct device sc_dev;
+ struct virtio_softc *sc_virtio;
+ struct virtqueue sc_vq[2];
+ u_int32_t sc_npages;
+ u_int32_t sc_actual;
+ struct balloon_req sc_req;
+ struct workq_task sc_task;
+ int sc_workq_queued;
+ struct pglist sc_balloon_pages;
+};
+
+int viomb_match(struct device *, void *, void *);
+void viomb_attach(struct device *, struct device *, void *);
+void viomb_worker(void *, void *);
+void viomb_inflate(struct viomb_softc *);
+void viomb_deflate(struct viomb_softc *);
+int viomb_config_change(struct virtio_softc *);
+void viomb_read_config(struct viomb_softc *);
+int viomb_vq_dequeue(struct virtqueue *);
+int viomb_inflate_intr(struct virtqueue *);
+int viomb_deflate_intr(struct virtqueue *);
+
+struct cfattach viomb_ca = {
+ sizeof(struct viomb_softc), viomb_match, viomb_attach
+};
+
+struct cfdriver viomb_cd = {
+ NULL, "viomb", DV_DULL
+};
+
+struct workq *viomb_workq;
+
+int
+viomb_match(struct device *parent, void *match, void *aux)
+{
+ struct virtio_softc *va = aux;
+ if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_BALLOON)
+ return (1);
+ return (0);
+}
+
+void
+viomb_attach(struct device *parent, struct device *self, void *aux)
+{
+ struct viomb_softc *sc = (struct viomb_softc *)self;
+ struct virtio_softc *vsc = (struct virtio_softc *)parent;
+ u_int32_t features;
+ int i;
+
+ if (vsc->sc_child != NULL) {
+ printf("child already attached for %s; something wrong...\n",
+ parent->dv_xname);
+ return;
+ }
+
+ /* fail on non-4K page size archs */
+ if (VIRTIO_PAGE_SIZE != PAGE_SIZE){
+ printf("non-4K page size arch found, needs %d, got %d\n",
+ VIRTIO_PAGE_SIZE, PAGE_SIZE);
+ return;
+ }
+
+ sc->sc_virtio = vsc;
+ vsc->sc_vqs = &sc->sc_vq[VQ_INFLATE];
+ vsc->sc_nvqs = 0;
+ vsc->sc_child = self;
+ vsc->sc_ipl = IPL_VM;
+ vsc->sc_config_change = viomb_config_change;
+ vsc->sc_intrhand = virtio_vq_intr;
+
+ /* negotiate features */
+ features = VIRTIO_F_RING_INDIRECT_DESC;
+ features = virtio_negotiate_features(vsc, features,
+ viomb_feature_names);
+
+ if ((virtio_alloc_vq(vsc, &sc->sc_vq[VQ_INFLATE], VQ_INFLATE,
+ sizeof(u_int32_t) * PGS_PER_REQ, 1, "inflate") != 0))
+ goto err;
+ vsc->sc_nvqs++;
+ if ((virtio_alloc_vq(vsc, &sc->sc_vq[VQ_DEFLATE], VQ_DEFLATE,
+ sizeof(u_int32_t) * PGS_PER_REQ, 1, "deflate") != 0))
+ goto err;
+ vsc->sc_nvqs++;
+
+ sc->sc_vq[VQ_INFLATE].vq_done = viomb_inflate_intr;
+ sc->sc_vq[VQ_DEFLATE].vq_done = viomb_deflate_intr;
+ virtio_start_vq_intr(vsc, &sc->sc_vq[VQ_INFLATE]);
+ virtio_start_vq_intr(vsc, &sc->sc_vq[VQ_DEFLATE]);
+
+ viomb_read_config(sc);
+ TAILQ_INIT(&sc->sc_balloon_pages);
+
+ if (bus_dmamap_create(vsc->sc_dmat, sizeof(u_int32_t) * PGS_PER_REQ,
+ 1, sizeof(u_int32_t) * PGS_PER_REQ, 0,
+ BUS_DMA_NOWAIT, &sc->sc_req.bl_dmamap)) {
+ printf("%s: dmamap creation failed.\n", DEVNAME(sc));
+ goto err;
+ }
+ if (bus_dmamap_load(vsc->sc_dmat, sc->sc_req.bl_dmamap,
+ &sc->sc_req.bl_pages[0],
+ sizeof(uint32_t) * PGS_PER_REQ,
+ NULL, BUS_DMA_NOWAIT)) {
+ printf("%s: dmamap load failed.\n", DEVNAME(sc));
+ goto err_dmamap;
+ }
+
+ viomb_workq = workq_create("viomb", 1, IPL_VM);
+ if (viomb_workq == NULL)
+ goto err_dmamap;
+ printf("\n");
+ return;
+err_dmamap:
+ bus_dmamap_destroy(vsc->sc_dmat, sc->sc_req.bl_dmamap);
+err:
+ for (i = 0; i < vsc->sc_nvqs; i++)
+ virtio_free_vq(vsc, &sc->sc_vq[i]);
+ vsc->sc_nvqs = 0;
+ vsc->sc_child = VIRTIO_CHILD_ERROR;
+ return;
+}
+
+/*
+ * Config change
+ */
+int
+viomb_config_change(struct virtio_softc *vsc)
+{
+ struct viomb_softc *sc = (struct viomb_softc *)vsc->sc_child;
+
+ if (sc->sc_workq_queued == 0){
+ workq_queue_task(viomb_workq, &sc->sc_task, 0,
+ viomb_worker, sc, NULL);
+ sc->sc_workq_queued = 1;
+ }
+ return (1);
+}
+
+void
+viomb_worker(void *arg1, void *arg2)
+{
+ struct viomb_softc *sc = (struct viomb_softc *)arg1;
+ int s;
+
+ s = splvm();
+ sc->sc_workq_queued = 0;
+ viomb_read_config(sc);
+ if (sc->sc_npages > sc->sc_actual){
+ VIOMBDEBUG(sc, "inflating balloon from %u to %u.\n",
+ sc->sc_actual, sc->sc_npages);
+ viomb_inflate(sc);
+ }
+ else if (sc->sc_npages < sc->sc_actual){
+ viomb_deflate(sc);
+ VIOMBDEBUG(sc, "deflating balloon from %u to %u.\n",
+ sc->sc_actual, sc->sc_npages);
+ }
+ splx(s);
+}
+
+void
+viomb_inflate(struct viomb_softc *sc)
+{
+ struct virtio_softc *vsc = (struct virtio_softc *)sc->sc_virtio;
+ struct balloon_req *b;
+ struct vm_page *p;
+ struct virtqueue *vq = &sc->sc_vq[VQ_INFLATE];
+ u_int32_t nvpages;
+ int slot, error, i = 0;
+
+ nvpages = sc->sc_npages - sc->sc_actual;
+ if (nvpages > PGS_PER_REQ)
+ nvpages = PGS_PER_REQ;
+ b = &sc->sc_req;
+
+ if ((error = uvm_pglistalloc(nvpages * PAGE_SIZE, 0,
+ dma_constraint.ucr_high,
+ 0, 0, &b->bl_pglist, nvpages,
+ UVM_PLA_NOWAIT))) {
+ printf("%s unable to allocate %lu physmem pages,"
+ "error %d\n", DEVNAME(sc), nvpages, error);
+ return;
+ }
+
+ b->bl_nentries = nvpages;
+ TAILQ_FOREACH(p, &b->bl_pglist, pageq)
+ b->bl_pages[i++] = p->phys_addr / VIRTIO_PAGE_SIZE;
+
+ KASSERT(i == nvpages);
+
+ if ((virtio_enqueue_prep(vq, &slot)) > 0) {
+ printf("%s:virtio_enqueue_prep() vq_num %d\n",
+ DEVNAME(sc), vq->vq_num);
+ goto err;
+ }
+ if (virtio_enqueue_reserve(vq, slot, 1)) {
+ printf("%s:virtio_enqueue_reserve vq_num %d\n",
+ DEVNAME(sc), vq->vq_num);
+ goto err;
+ }
+ bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap, 0,
+ sizeof(u_int32_t) * nvpages, BUS_DMASYNC_PREWRITE);
+ virtio_enqueue_p(vq, slot, b->bl_dmamap, 0,
+ sizeof(u_int32_t) * nvpages, VRING_READ);
+ virtio_enqueue_commit(vsc, vq, slot, VRING_NOTIFY);
+ return;
+err:
+ uvm_pglistfree(&b->bl_pglist);
+ return;
+}
+
+void
+viomb_deflate(struct viomb_softc *sc)
+{
+ struct virtio_softc *vsc = (struct virtio_softc *)sc->sc_virtio;
+ struct balloon_req *b;
+ struct vm_page *p;
+ struct virtqueue *vq = &sc->sc_vq[VQ_DEFLATE];
+ u_int64_t nvpages;
+ int i, slot;
+
+ nvpages = sc->sc_actual - sc->sc_npages;
+ if (nvpages > PGS_PER_REQ)
+ nvpages = PGS_PER_REQ;
+ b = &sc->sc_req;
+ b->bl_nentries = nvpages;
+
+ TAILQ_INIT(&b->bl_pglist);
+ for (i = 0; i < nvpages; i++) {
+ p = TAILQ_FIRST(&sc->sc_balloon_pages);
+ if (p == NULL){
+ b->bl_nentries = i - 1;
+ break;
+ }
+ TAILQ_REMOVE(&sc->sc_balloon_pages, p, pageq);
+ TAILQ_INSERT_TAIL(&b->bl_pglist, p, pageq);
+ b->bl_pages[i] = p->phys_addr / VIRTIO_PAGE_SIZE;
+ }
+
+ if (virtio_enqueue_prep(vq, &slot)) {
+ printf("%s:virtio_get_slot(def) vq_num %d\n",
+ DEVNAME(sc), vq->vq_num);
+ goto err;
+ }
+ if (virtio_enqueue_reserve(vq, slot, 1)) {
+ printf("%s:virtio_enqueue_reserve() vq_num %d\n",
+ DEVNAME(sc), vq->vq_num);
+ goto err;
+ }
+ bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap, 0,
+ sizeof(u_int32_t) * nvpages,
+ BUS_DMASYNC_PREWRITE);
+ virtio_enqueue_p(vq, slot, b->bl_dmamap, 0,
+ sizeof(u_int32_t) * nvpages, VRING_READ);
+
+ if (!(vsc->sc_features & VIRTIO_BALLOON_F_MUST_TELL_HOST))
+ uvm_pglistfree(&b->bl_pglist);
+ virtio_enqueue_commit(vsc, vq, slot, VRING_NOTIFY);
+ return;
+err:
+ while ((p = TAILQ_LAST(&b->bl_pglist, pglist))) {
+ TAILQ_REMOVE(&b->bl_pglist, p, pageq);
+ TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p, pageq);
+ }
+ return;
+}
+
+void
+viomb_read_config(struct viomb_softc *sc)
+{
+ struct virtio_softc *vsc = (struct virtio_softc *)sc->sc_virtio;
+ u_int32_t reg;
+
+ /* these values are explicitly specified as little-endian */
+ reg = virtio_read_device_config_4(vsc, VIRTIO_BALLOON_CONFIG_NUM_PAGES);
+ sc->sc_npages = letoh32(reg);
+ reg = virtio_read_device_config_4(vsc, VIRTIO_BALLOON_CONFIG_ACTUAL);
+ sc->sc_actual = letoh32(reg);
+ VIOMBDEBUG(sc, "sc->sc_npages %u, sc->sc_actual %u\n",
+ sc->sc_npages, sc->sc_actual);
+}
+
+int
+viomb_vq_dequeue(struct virtqueue *vq)
+{
+ struct virtio_softc *vsc = vq->vq_owner;
+ struct viomb_softc *sc = (struct viomb_softc *)vsc->sc_child;
+ int r, slot;
+
+ r = virtio_dequeue(vsc, vq, &slot, NULL);
+ if (r != 0) {
+ printf("%s: dequeue failed, errno %d\n",
+ DEVNAME(sc), r);
+ return(r);
+ }
+ virtio_dequeue_commit(vq, slot);
+ return(0);
+}
+
+/*
+ * interrupt handling for vq's
+ */
+int
+viomb_inflate_intr(struct virtqueue *vq)
+{
+ struct virtio_softc *vsc = vq->vq_owner;
+ struct viomb_softc *sc = (struct viomb_softc *)vsc->sc_child;
+ struct balloon_req *b;
+ struct vm_page *p;
+ u_int64_t nvpages;
+
+ if (viomb_vq_dequeue(vq))
+ return(1);
+
+ b = &sc->sc_req;
+ nvpages = b->bl_nentries;
+ bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap,
+ offsetof(struct balloon_req, bl_pages),
+ sizeof(u_int32_t) * nvpages,
+ BUS_DMASYNC_POSTWRITE);
+ while (!TAILQ_EMPTY(&b->bl_pglist)) {
+ p = TAILQ_FIRST(&b->bl_pglist);
+ TAILQ_REMOVE(&b->bl_pglist, p, pageq);
+ TAILQ_INSERT_TAIL(&sc->sc_balloon_pages, p, pageq);
+ }
+ VIOMBDEBUG(sc, "updating sc->sc_actual from %lu to %lu\n",
+ sc->sc_actual, sc->sc_actual + nvpages);
+ virtio_write_device_config_4(vsc, VIRTIO_BALLOON_CONFIG_ACTUAL,
+ sc->sc_actual + nvpages);
+ viomb_read_config(sc);
+ /* if we have more work to do, add it to the task list */
+ if (sc->sc_npages > sc->sc_actual && sc->sc_workq_queued == 0){
+ workq_queue_task(viomb_workq, &sc->sc_task, 0,
+ viomb_worker, sc, NULL);
+ sc->sc_workq_queued = 1;
+ }
+ return (1);
+}
+
+int
+viomb_deflate_intr(struct virtqueue *vq)
+{
+ struct virtio_softc *vsc = vq->vq_owner;
+ struct viomb_softc *sc = (struct viomb_softc *)vsc->sc_child;
+ struct balloon_req *b;
+ u_int64_t nvpages;
+
+ if (viomb_vq_dequeue(vq))
+ return(1);
+
+ b = &sc->sc_req;
+ nvpages = b->bl_nentries;
+ bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap,
+ offsetof(struct balloon_req, bl_pages),
+ sizeof(u_int32_t) * nvpages,
+ BUS_DMASYNC_POSTWRITE);
+
+ if (vsc->sc_features & VIRTIO_BALLOON_F_MUST_TELL_HOST)
+ uvm_pglistfree(&b->bl_pglist);
+
+ VIOMBDEBUG(sc, "updating sc->sc_actual from %lu to %lu\n",
+ sc->sc_actual, sc->sc_actual - nvpages);
+ virtio_write_device_config_4(vsc, VIRTIO_BALLOON_CONFIG_ACTUAL,
+ sc->sc_actual - nvpages);
+ viomb_read_config(sc);
+
+ /* if we have more work to do, add it to tasks list */
+ if (sc->sc_npages < sc->sc_actual && sc->sc_workq_queued == 0){
+ workq_queue_task(viomb_workq, &sc->sc_task, 0,
+ viomb_worker, sc, NULL);
+ sc->sc_workq_queued = 1;
+ }
+ return(1);
+}