summaryrefslogtreecommitdiff
path: root/sys/dev
diff options
context:
space:
mode:
authorStefan Fritsch <sf@cvs.openbsd.org>2012-09-19 19:24:34 +0000
committerStefan Fritsch <sf@cvs.openbsd.org>2012-09-19 19:24:34 +0000
commit14d25ef86c56f8785dce0f9911afc33ac48155a7 (patch)
treef39ddbee72f9fab0b16178af9c9feee8592def55 /sys/dev
parent7a9376510fb01519452fa89de1bb636aa11894b6 (diff)
Add new drivers for virtio network (vio) and block devices (vioblk, the disks
attach as scsi disks). These are paravirtualized devices offered by some hypervisors like kvm and virtualbox. The virtio transport driver has the pci specific parts separated out. This will make it easier to add support for mmio (e.g. for ARM) later. OK mikeb OK jasper "commit what you have" deraadt
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/pci/files.pci17
-rw-r--r--sys/dev/pci/if_vio.c1357
-rw-r--r--sys/dev/pci/vioblk.c619
-rw-r--r--sys/dev/pci/vioblkreg.h75
-rw-r--r--sys/dev/pci/virtio.c918
-rw-r--r--sys/dev/pci/virtio_pci.c411
-rw-r--r--sys/dev/pci/virtioreg.h193
-rw-r--r--sys/dev/pci/virtiovar.h229
8 files changed, 3818 insertions, 1 deletions
diff --git a/sys/dev/pci/files.pci b/sys/dev/pci/files.pci
index bde71ee0ea5..c16d46dedfb 100644
--- a/sys/dev/pci/files.pci
+++ b/sys/dev/pci/files.pci
@@ -1,4 +1,4 @@
-# $OpenBSD: files.pci,v 1.287 2012/08/30 21:54:12 mpi Exp $
+# $OpenBSD: files.pci,v 1.288 2012/09/19 19:24:33 sf Exp $
# $NetBSD: files.pci,v 1.20 1996/09/24 17:47:15 christos Exp $
#
# Config file and device description for machine-independent PCI code.
@@ -823,3 +823,18 @@ file dev/pci/itherm.c itherm
device glxpcib: isabus, gpiobus, i2cbus
attach glxpcib at pci
file dev/pci/glxpcib.c glxpcib
+
+# VirtIO
+device virtio {}
+file dev/pci/virtio.c virtio
+
+attach virtio at pci with virtio_pci
+file dev/pci/virtio_pci.c virtio_pci
+
+device vio
+attach vio at virtio
+file dev/pci/if_vio.c vio
+
+device vioblk: scsi
+attach vioblk at virtio
+file dev/pci/vioblk.c vioblk
diff --git a/sys/dev/pci/if_vio.c b/sys/dev/pci/if_vio.c
new file mode 100644
index 00000000000..076b77dc3ae
--- /dev/null
+++ b/sys/dev/pci/if_vio.c
@@ -0,0 +1,1357 @@
+/*
+ * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
+ * Copyright (c) 2010 Minoura Makoto.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include "bpfilter.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/device.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/timeout.h>
+
+#include <dev/pci/pcidevs.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <dev/pci/virtioreg.h>
+#include <dev/pci/virtiovar.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#endif
+
+#include <net/bpf.h>
+
+#if VIRTIO_DEBUG
+#define DBGPRINT(fmt, args...) printf("%s: " fmt "\n", __func__, ## args)
+#else
+#define DBGPRINT(fmt, args...)
+#endif
+
+/*
+ * if_vioreg.h:
+ */
+/* Configuration registers */
+#define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */
+#define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */
+
+/* Feature bits */
+#define VIRTIO_NET_F_CSUM (1<<0)
+#define VIRTIO_NET_F_GUEST_CSUM (1<<1)
+#define VIRTIO_NET_F_MAC (1<<5)
+#define VIRTIO_NET_F_GSO (1<<6)
+#define VIRTIO_NET_F_GUEST_TSO4 (1<<7)
+#define VIRTIO_NET_F_GUEST_TSO6 (1<<8)
+#define VIRTIO_NET_F_GUEST_ECN (1<<9)
+#define VIRTIO_NET_F_GUEST_UFO (1<<10)
+#define VIRTIO_NET_F_HOST_TSO4 (1<<11)
+#define VIRTIO_NET_F_HOST_TSO6 (1<<12)
+#define VIRTIO_NET_F_HOST_ECN (1<<13)
+#define VIRTIO_NET_F_HOST_UFO (1<<14)
+#define VIRTIO_NET_F_MRG_RXBUF (1<<15)
+#define VIRTIO_NET_F_STATUS (1<<16)
+#define VIRTIO_NET_F_CTRL_VQ (1<<17)
+#define VIRTIO_NET_F_CTRL_RX (1<<18)
+#define VIRTIO_NET_F_CTRL_VLAN (1<<19)
+#define VIRTIO_NET_F_CTRL_RX_EXTRA (1<<20)
+#define VIRTIO_NET_F_GUEST_ANNOUNCE (1<<21)
+
+static const struct virtio_feature_name virtio_net_feature_names[] = {
+ { VIRTIO_NET_F_CSUM, "CSum" },
+ { VIRTIO_NET_F_MAC, "MAC" },
+ { VIRTIO_NET_F_GSO, "GSO" },
+ { VIRTIO_NET_F_GUEST_TSO4, "GuestTSO4" },
+ { VIRTIO_NET_F_GUEST_TSO6, "GuestTSO6" },
+ { VIRTIO_NET_F_GUEST_ECN, "GuestECN" },
+ { VIRTIO_NET_F_GUEST_UFO, "GuestUFO" },
+ { VIRTIO_NET_F_HOST_TSO4, "HostTSO4" },
+ { VIRTIO_NET_F_HOST_TSO6, "HostTSO6" },
+ { VIRTIO_NET_F_HOST_ECN, "HostECN" },
+ { VIRTIO_NET_F_HOST_UFO, "HostUFO" },
+ { VIRTIO_NET_F_MRG_RXBUF, "MrgRXBuf" },
+ { VIRTIO_NET_F_STATUS, "Status" },
+ { VIRTIO_NET_F_CTRL_VQ, "CtrlVQ" },
+ { VIRTIO_NET_F_CTRL_RX, "CtrlRX" },
+ { VIRTIO_NET_F_CTRL_VLAN, "CtrlVLAN" },
+ { VIRTIO_NET_F_CTRL_RX_EXTRA, "CtrlRXExtra" },
+ { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" },
+ { 0, NULL }
+};
+
+/* Status */
+#define VIRTIO_NET_S_LINK_UP 1
+
+/* Packet header structure */
+struct virtio_net_hdr {
+ uint8_t flags;
+ uint8_t gso_type;
+ uint16_t hdr_len;
+ uint16_t gso_size;
+ uint16_t csum_start;
+ uint16_t csum_offset;
+#if 0
+ uint16_t num_buffers; /* if VIRTIO_NET_F_MRG_RXBUF enabled */
+#endif
+} __packed;
+
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* flags */
+#define VIRTIO_NET_HDR_GSO_NONE 0 /* gso_type */
+#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* gso_type */
+#define VIRTIO_NET_HDR_GSO_UDP 3 /* gso_type */
+#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* gso_type */
+#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* gso_type, |'ed */
+
+#define VIRTIO_NET_MAX_GSO_LEN (65536+ETHER_HDR_LEN)
+
+/* Control virtqueue */
+struct virtio_net_ctrl_cmd {
+ uint8_t class;
+ uint8_t command;
+} __packed;
+#define VIRTIO_NET_CTRL_RX 0
+# define VIRTIO_NET_CTRL_RX_PROMISC 0
+# define VIRTIO_NET_CTRL_RX_ALLMULTI 1
+
+#define VIRTIO_NET_CTRL_MAC 1
+# define VIRTIO_NET_CTRL_MAC_TABLE_SET 0
+
+#define VIRTIO_NET_CTRL_VLAN 2
+# define VIRTIO_NET_CTRL_VLAN_ADD 0
+# define VIRTIO_NET_CTRL_VLAN_DEL 1
+
+struct virtio_net_ctrl_status {
+ uint8_t ack;
+} __packed;
+#define VIRTIO_NET_OK 0
+#define VIRTIO_NET_ERR 1
+
+struct virtio_net_ctrl_rx {
+ uint8_t onoff;
+} __packed;
+
+struct virtio_net_ctrl_mac_tbl {
+ uint32_t nentries;
+ uint8_t macs[][ETHER_ADDR_LEN];
+} __packed;
+
+struct virtio_net_ctrl_vlan {
+ uint16_t id;
+} __packed;
+
+/*
+ * if_viovar.h:
+ */
+enum vio_ctrl_state {
+ FREE, INUSE, DONE, RESET
+};
+
+struct vio_softc {
+ struct device sc_dev;
+
+ struct virtio_softc *sc_virtio;
+#define VQRX 0
+#define VQTX 1
+#define VQCTL 2
+ struct virtqueue sc_vq[3];
+
+ struct arpcom sc_ac;
+ struct ifmedia sc_media;
+
+ short sc_ifflags;
+
+ /* bus_dmamem */
+ bus_dma_segment_t sc_dma_seg;
+ bus_dmamap_t sc_dma_map;
+ size_t sc_dma_size;
+ caddr_t sc_dma_kva;
+
+ struct virtio_net_hdr *sc_rx_hdrs;
+ struct virtio_net_hdr *sc_tx_hdrs;
+ struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
+ struct virtio_net_ctrl_status *sc_ctrl_status;
+ struct virtio_net_ctrl_rx *sc_ctrl_rx;
+ struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
+#define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc
+ struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
+
+ /* kmem */
+ bus_dmamap_t *sc_arrays;
+#define sc_rx_dmamaps sc_arrays
+ bus_dmamap_t *sc_tx_dmamaps;
+ struct mbuf **sc_rx_mbufs;
+ struct mbuf **sc_tx_mbufs;
+
+ enum vio_ctrl_state sc_ctrl_inuse;
+
+ struct timeout sc_tick;
+};
+
+#define VIO_DMAMEM_OFFSET(sc, p) ((caddr_t)(p) - (sc)->sc_dma_kva)
+#define VIO_DMAMEM_SYNC(vsc, sc, p, size, flags) \
+ bus_dmamap_sync((vsc)->sc_dmat, (sc)->sc_dma_map, \
+ VIO_DMAMEM_OFFSET((sc), (p)), (size), (flags))
+#define VIO_DMAMEM_ENQUEUE(sc, vq, slot, p, size, write) \
+ virtio_enqueue_p((vq), (slot), (sc)->sc_dma_map, \
+ VIO_DMAMEM_OFFSET((sc), (p)), (size), (write))
+
+#define VIRTIO_NET_TX_MAXNSEGS 16 /* for larger chains, defrag */
+#define VIRTIO_NET_CTRL_MAC_MAXENTRIES 64 /* for more entries, use ALLMULTI */
+
+/* for now, sc_ctrl_mac_tbl_uc has always 0 entries */
+#define VIO_CTRL_MAC_INFO_SIZE \
+ (2*sizeof(struct virtio_net_ctrl_mac_tbl) + \
+ 0 + VIRTIO_NET_CTRL_MAC_MAXENTRIES * ETHER_ADDR_LEN)
+
+/* cfattach interface functions */
+int vio_match(struct device *, void *, void *);
+void vio_attach(struct device *, struct device *, void *);
+
+/* ifnet interface functions */
+int vio_init(struct ifnet *);
+void vio_stop(struct ifnet *, int);
+void vio_start(struct ifnet *);
+int vio_ioctl(struct ifnet *, u_long, caddr_t);
+void vio_get_lladr(struct arpcom *ac, struct virtio_softc *vsc);
+void vio_put_lladr(struct arpcom *ac, struct virtio_softc *vsc);
+
+/* rx */
+int vio_add_rx_mbuf(struct vio_softc *, int);
+void vio_free_rx_mbuf(struct vio_softc *, int);
+void vio_populate_rx_mbufs(struct vio_softc *);
+int vio_rxeof(struct vio_softc *);
+int vio_rx_intr(struct virtqueue *);
+void vio_rx_drain(struct vio_softc *);
+
+/* tx */
+int vio_tx_intr(struct virtqueue *);
+int vio_txeof(struct virtqueue *);
+void vio_tx_drain(struct vio_softc *);
+int vio_encap(struct vio_softc *, int, struct mbuf *, struct mbuf **);
+void vio_txtick(void *);
+
+/* other control */
+int vio_link_state(struct ifnet *);
+int vio_config_change(struct virtio_softc *);
+int vio_ctrl_rx(struct vio_softc *, int, int);
+int vio_set_rx_filter(struct vio_softc *);
+int vio_iff(struct vio_softc *);
+int vio_media_change(struct ifnet *);
+void vio_media_status(struct ifnet *, struct ifmediareq *);
+int vio_ctrleof(struct virtqueue *);
+void vio_wait_ctrl(struct vio_softc *sc);
+int vio_wait_ctrl_done(struct vio_softc *sc);
+void vio_ctrl_wakeup(struct vio_softc *, enum vio_ctrl_state);
+int vio_alloc_mem(struct vio_softc *);
+int vio_alloc_dmamem(struct vio_softc *);
+void vio_free_dmamem(struct vio_softc *);
+
+
+int
+vio_match(struct device *parent, void *match, void *aux)
+{
+ struct virtio_softc *va = aux;
+
+ if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
+ return 1;
+
+ return 0;
+}
+
+struct cfattach vio_ca = {
+ sizeof(struct vio_softc), vio_match, vio_attach, NULL
+};
+
+struct cfdriver vio_cd = {
+ NULL, "vio", DV_IFNET
+};
+
+int
+vio_alloc_dmamem(struct vio_softc *sc)
+{
+ struct virtio_softc *vsc = sc->sc_virtio;
+ int nsegs;
+
+ if (bus_dmamap_create(vsc->sc_dmat, sc->sc_dma_size, 1,
+ sc->sc_dma_size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,
+ &sc->sc_dma_map) != 0)
+ goto err;
+ if (bus_dmamem_alloc(vsc->sc_dmat, sc->sc_dma_size, 16, 0,
+ &sc->sc_dma_seg, 1, &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO) != 0)
+ goto destroy;
+ if (bus_dmamem_map(vsc->sc_dmat, &sc->sc_dma_seg, nsegs,
+ sc->sc_dma_size, &sc->sc_dma_kva, BUS_DMA_NOWAIT) != 0)
+ goto free;
+ if (bus_dmamap_load(vsc->sc_dmat, sc->sc_dma_map, sc->sc_dma_kva,
+ sc->sc_dma_size, NULL, BUS_DMA_NOWAIT) != 0)
+ goto unmap;
+ return (0);
+
+unmap:
+ bus_dmamem_unmap(vsc->sc_dmat, sc->sc_dma_kva, sc->sc_dma_size);
+free:
+ bus_dmamem_free(vsc->sc_dmat, &sc->sc_dma_seg, 1);
+destroy:
+ bus_dmamap_destroy(vsc->sc_dmat, sc->sc_dma_map);
+err:
+ return (1);
+}
+
+void
+vio_free_dmamem(struct vio_softc *sc)
+{
+ struct virtio_softc *vsc = sc->sc_virtio;
+ bus_dmamap_unload(vsc->sc_dmat, sc->sc_dma_map);
+ bus_dmamem_unmap(vsc->sc_dmat, sc->sc_dma_kva, sc->sc_dma_size);
+ bus_dmamem_free(vsc->sc_dmat, &sc->sc_dma_seg, 1);
+ bus_dmamap_destroy(vsc->sc_dmat, sc->sc_dma_map);
+}
+
+/* allocate memory */
+/*
+ * dma memory is used for:
+ * sc_rx_hdrs[slot]: metadata array for recieved frames (READ)
+ * sc_tx_hdrs[slot]: metadata array for frames to be sent (WRITE)
+ * sc_ctrl_cmd: command to be sent via ctrl vq (WRITE)
+ * sc_ctrl_status: return value for a command via ctrl vq (READ)
+ * sc_ctrl_rx: parameter for a VIRTIO_NET_CTRL_RX class command
+ * (WRITE)
+ * sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
+ * class command (WRITE)
+ * sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
+ * class command (WRITE)
+ * sc_ctrl_* structures are allocated only one each; they are protected by
+ * sc_ctrl_inuse, which must only be accessed at splnet
+ */
+/*
+ * dynamically allocated memory is used for:
+ * sc_rx_dmamaps[slot]: bus_dmamap_t array for recieved payload
+ * sc_tx_dmamaps[slot]: bus_dmamap_t array for sent payload
+ * sc_rx_mbufs[slot]: mbuf pointer array for recieved frames
+ * sc_tx_mbufs[slot]: mbuf pointer array for sent frames
+ */
+int
+vio_alloc_mem(struct vio_softc *sc)
+{
+ struct virtio_softc *vsc = sc->sc_virtio;
+ int allocsize, r, i;
+ unsigned int offset = 0;
+ int rxqsize, txqsize;
+ caddr_t kva;
+
+ rxqsize = vsc->sc_vqs[0].vq_num;
+ txqsize = vsc->sc_vqs[1].vq_num;
+
+ allocsize = sizeof(struct virtio_net_hdr) * rxqsize;
+ allocsize += sizeof(struct virtio_net_hdr) * txqsize;
+ if (vsc->sc_nvqs == 3) {
+ allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
+ allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
+ allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
+ allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
+ + sizeof(struct virtio_net_ctrl_mac_tbl)
+ + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
+ }
+ sc->sc_dma_size = allocsize;
+
+ if (vio_alloc_dmamem(sc) != 0) {
+ printf("unable to allocate dma region\n");
+ return -1;
+ }
+
+ kva = sc->sc_dma_kva;
+ sc->sc_rx_hdrs = (struct virtio_net_hdr*)kva;
+ offset += sizeof(struct virtio_net_hdr) * rxqsize;
+ sc->sc_tx_hdrs = (struct virtio_net_hdr*)(kva + offset);
+ offset += sizeof(struct virtio_net_hdr) * txqsize;
+ if (vsc->sc_nvqs == 3) {
+ sc->sc_ctrl_cmd = (void*)(kva + offset);
+ offset += sizeof(*sc->sc_ctrl_cmd);
+ sc->sc_ctrl_status = (void*)(kva + offset);
+ offset += sizeof(*sc->sc_ctrl_status);
+ sc->sc_ctrl_rx = (void*)(kva + offset);
+ offset += sizeof(*sc->sc_ctrl_rx);
+ sc->sc_ctrl_mac_tbl_uc = (void*)(kva + offset);
+ offset += sizeof(*sc->sc_ctrl_mac_tbl_uc);
+ /* For now, sc_ctrl_mac_tbl_uc is followed by 0 MAC entries */
+ sc->sc_ctrl_mac_tbl_mc = (void*)(kva + offset);
+ }
+
+ allocsize = (rxqsize + txqsize) *
+ (2 * sizeof(bus_dmamap_t) + sizeof(struct mbuf *));
+ sc->sc_arrays = malloc(allocsize, M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
+ if (sc->sc_arrays == NULL) {
+ printf("unable to allocate mem for dmamaps\n");
+ goto err_hdr;
+ }
+
+ sc->sc_tx_dmamaps = sc->sc_arrays + rxqsize;
+ sc->sc_rx_mbufs = (void*) (sc->sc_tx_dmamaps + txqsize);
+ sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize;
+
+ for (i = 0; i < rxqsize; i++) {
+ r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0,
+ BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]);
+ if (r != 0)
+ goto err_reqs;
+ }
+
+ for (i = 0; i < txqsize; i++) {
+ r = bus_dmamap_create(vsc->sc_dmat, ETHER_MAX_LEN,
+ VIRTIO_NET_TX_MAXNSEGS, ETHER_MAX_LEN, 0,
+ BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
+ &sc->sc_tx_dmamaps[i]);
+ if (r != 0)
+ goto err_reqs;
+ }
+
+ return 0;
+
+err_reqs:
+ printf("dmamap creation failed, error %d\n", r);
+ for (i = 0; i < txqsize; i++) {
+ if (sc->sc_tx_dmamaps[i])
+ bus_dmamap_destroy(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
+ }
+ for (i = 0; i < rxqsize; i++) {
+ if (sc->sc_tx_dmamaps[i])
+ bus_dmamap_destroy(vsc->sc_dmat, sc->sc_rx_dmamaps[i]);
+ }
+ if (sc->sc_arrays) {
+ free(sc->sc_arrays, M_DEVBUF);
+ sc->sc_arrays = 0;
+ }
+err_hdr:
+ vio_free_dmamem(sc);
+ return -1;
+}
+
+void
+vio_get_lladr(struct arpcom *ac, struct virtio_softc *vsc)
+{
+ int i;
+ for (i = 0; i < ETHER_ADDR_LEN; i++) {
+ ac->ac_enaddr[i] = virtio_read_device_config_1(vsc,
+ VIRTIO_NET_CONFIG_MAC + i);
+ }
+}
+
+void
+vio_put_lladr(struct arpcom *ac, struct virtio_softc *vsc)
+{
+ int i;
+ for (i = 0; i < ETHER_ADDR_LEN; i++) {
+ virtio_write_device_config_1(vsc, VIRTIO_NET_CONFIG_MAC + i,
+ ac->ac_enaddr[i]);
+ }
+}
+
+void
+vio_attach(struct device *parent, struct device *self, void *aux)
+{
+ struct vio_softc *sc = (struct vio_softc *)self;
+ struct virtio_softc *vsc = (struct virtio_softc *)parent;
+ uint32_t features;
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+
+ if (vsc->sc_child != NULL) {
+ printf("child already attached for %s; something wrong...\n",
+ parent->dv_xname);
+ return;
+ }
+
+ sc->sc_virtio = vsc;
+
+ vsc->sc_child = self;
+ vsc->sc_ipl = IPL_NET;
+ vsc->sc_vqs = &sc->sc_vq[0];
+ vsc->sc_config_change = 0;
+ vsc->sc_intrhand = virtio_vq_intr;
+
+ features = VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS |
+ VIRTIO_NET_F_CTRL_VQ | VIRTIO_NET_F_CTRL_RX;
+ /*
+ * VIRTIO_F_RING_EVENT_IDX can be switched off by setting bit 2 in the
+ * driver flags, see config(8)
+ */
+ if (!(sc->sc_dev.dv_cfdata->cf_flags & 2) &&
+ !(vsc->sc_dev.dv_cfdata->cf_flags & 2))
+ features |= VIRTIO_F_RING_EVENT_IDX;
+ else
+ printf("RingEventIdx disabled by UKC\n");
+
+ features = virtio_negotiate_features(vsc, features,
+ virtio_net_feature_names);
+ if (features & VIRTIO_NET_F_MAC) {
+ vio_get_lladr(&sc->sc_ac, vsc);
+ } else {
+ ether_fakeaddr(ifp);
+ vio_put_lladr(&sc->sc_ac, vsc);
+ }
+ printf(": address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
+
+ if (virtio_alloc_vq(vsc, &sc->sc_vq[VQRX], 0,
+ MCLBYTES + sizeof(struct virtio_net_hdr), 2, "rx") != 0) {
+ goto err;
+ }
+ vsc->sc_nvqs = 1;
+ sc->sc_vq[VQRX].vq_done = vio_rx_intr;
+ if (virtio_alloc_vq(vsc, &sc->sc_vq[VQTX], 1,
+ (sizeof(struct virtio_net_hdr) + (ETHER_MAX_LEN - ETHER_HDR_LEN)),
+ VIRTIO_NET_TX_MAXNSEGS + 1, "tx") != 0) {
+ goto err;
+ }
+ vsc->sc_nvqs = 2;
+ sc->sc_vq[VQTX].vq_done = vio_tx_intr;
+ virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]);
+ if (features & VIRTIO_F_RING_EVENT_IDX)
+ virtio_postpone_intr_far(&sc->sc_vq[VQTX]);
+ else
+ virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
+ if ((features & VIRTIO_NET_F_CTRL_VQ)
+ && (features & VIRTIO_NET_F_CTRL_RX)) {
+ if (virtio_alloc_vq(vsc, &sc->sc_vq[VQCTL], 2, NBPG, 1,
+ "control") == 0) {
+ sc->sc_vq[VQCTL].vq_done = vio_ctrleof;
+ virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
+ vsc->sc_nvqs = 3;
+ }
+ }
+
+ if (vio_alloc_mem(sc) < 0)
+ goto err;
+
+ strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
+ ifp->if_softc = sc;
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_start = vio_start;
+ ifp->if_ioctl = vio_ioctl;
+ ifp->if_capabilities = 0;
+ IFQ_SET_MAXLEN(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1);
+ IFQ_SET_READY(&ifp->if_snd);
+ ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status);
+ ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+ ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
+ vsc->sc_config_change = vio_config_change;
+ m_clsetwms(ifp, MCLBYTES, 4, sc->sc_vq[VQRX].vq_num);
+ timeout_set(&sc->sc_tick, vio_txtick, &sc->sc_vq[VQTX]);
+
+ if_attach(ifp);
+ ether_ifattach(ifp);
+
+ return;
+
+err:
+ if (vsc->sc_nvqs == 3) {
+ virtio_free_vq(vsc, &sc->sc_vq[2]);
+ vsc->sc_nvqs = 2;
+ }
+ if (vsc->sc_nvqs == 2) {
+ virtio_free_vq(vsc, &sc->sc_vq[1]);
+ vsc->sc_nvqs = 1;
+ }
+ if (vsc->sc_nvqs == 1) {
+ virtio_free_vq(vsc, &sc->sc_vq[0]);
+ vsc->sc_nvqs = 0;
+ }
+ vsc->sc_child = VIRTIO_CHILD_ERROR;
+ return;
+}
+
+/* check link status */
+int
+vio_link_state(struct ifnet *ifp)
+{
+ struct vio_softc *sc = ifp->if_softc;
+ struct virtio_softc *vsc = sc->sc_virtio;
+ int link_state = LINK_STATE_FULL_DUPLEX;
+
+ if (vsc->sc_features & VIRTIO_NET_F_STATUS) {
+ int status = virtio_read_device_config_2(vsc,
+ VIRTIO_NET_CONFIG_STATUS);
+ if (!(status & VIRTIO_NET_S_LINK_UP))
+ link_state = LINK_STATE_DOWN;
+ }
+ if (ifp->if_link_state != link_state) {
+ ifp->if_link_state = link_state;
+ if_link_state_change(ifp);
+ }
+ return 0;
+}
+
+int
+vio_config_change(struct virtio_softc *vsc)
+{
+ struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
+ vio_link_state(&sc->sc_ac.ac_if);
+ return 1;
+}
+
+int
+vio_media_change(struct ifnet *ifp)
+{
+ /* Ignore */
+ return (0);
+}
+
+void
+vio_media_status(struct ifnet *ifp, struct ifmediareq *imr)
+{
+ imr->ifm_active = IFM_ETHER | IFM_AUTO;
+ imr->ifm_status = IFM_AVALID;
+
+ vio_link_state(ifp);
+ if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP)
+ imr->ifm_status |= IFM_ACTIVE|IFM_FDX;
+}
+
+/*
+ * Interface functions for ifnet
+ */
+int
+vio_init(struct ifnet *ifp)
+{
+ struct vio_softc *sc = ifp->if_softc;
+
+ vio_stop(ifp, 0);
+ vio_populate_rx_mbufs(sc);
+ ifp->if_flags |= IFF_RUNNING;
+ ifp->if_flags &= ~IFF_OACTIVE;
+ vio_iff(sc);
+ vio_link_state(ifp);
+ return 0;
+}
+
+void
+vio_stop(struct ifnet *ifp, int disable)
+{
+ struct vio_softc *sc = ifp->if_softc;
+ struct virtio_softc *vsc = sc->sc_virtio;
+
+ timeout_del(&sc->sc_tick);
+ ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
+ /* only way to stop I/O and DMA is resetting... */
+ virtio_reset(vsc);
+ vio_rxeof(sc);
+ if (vsc->sc_nvqs >= 3)
+ vio_ctrleof(&sc->sc_vq[VQCTL]);
+ vio_tx_drain(sc);
+ if (disable)
+ vio_rx_drain(sc);
+
+ virtio_reinit_start(vsc);
+ virtio_negotiate_features(vsc, vsc->sc_features, NULL);
+ virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]);
+ virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
+ if (vsc->sc_nvqs >= 3)
+ virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
+ virtio_reinit_end(vsc);
+ if (vsc->sc_nvqs >= 3) {
+ if (sc->sc_ctrl_inuse != FREE)
+ sc->sc_ctrl_inuse = RESET;
+ wakeup(&sc->sc_ctrl_inuse);
+ }
+}
+
+void
+vio_start(struct ifnet *ifp)
+{
+ struct vio_softc *sc = ifp->if_softc;
+ struct virtio_softc *vsc = sc->sc_virtio;
+ struct virtqueue *vq = &sc->sc_vq[VQTX];
+ struct mbuf *m;
+ int queued = 0;
+
+ vio_txeof(vq);
+
+ if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
+ return;
+
+again:
+ for (;;) {
+ int slot, r;
+ struct virtio_net_hdr *hdr;
+
+ IFQ_POLL(&ifp->if_snd, m);
+ if (m == NULL)
+ break;
+
+ r = virtio_enqueue_prep(vq, &slot);
+ if (r == EAGAIN) {
+ ifp->if_flags |= IFF_OACTIVE;
+ break;
+ }
+ if (r != 0)
+ panic("enqueue_prep for a tx buffer: %d", r);
+ r = vio_encap(sc, slot, m, &sc->sc_tx_mbufs[slot]);
+ if (r != 0) {
+ virtio_enqueue_abort(vq, slot);
+ ifp->if_flags |= IFF_OACTIVE;
+ break;
+ }
+ r = virtio_enqueue_reserve(vq, slot,
+ sc->sc_tx_dmamaps[slot]->dm_nsegs + 1);
+ if (r != 0) {
+ bus_dmamap_unload(vsc->sc_dmat,
+ sc->sc_tx_dmamaps[slot]);
+ sc->sc_tx_mbufs[slot] = NULL;
+ ifp->if_flags |= IFF_OACTIVE;
+ break;
+ }
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+
+ hdr = &sc->sc_tx_hdrs[slot];
+ memset(hdr, 0, sizeof(*hdr));
+ bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0,
+ sc->sc_tx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_PREWRITE);
+ VIO_DMAMEM_SYNC(vsc, sc, hdr, sizeof(*hdr),
+ BUS_DMASYNC_PREWRITE);
+ VIO_DMAMEM_ENQUEUE(sc, vq, slot, hdr, sizeof(*hdr), 1);
+ virtio_enqueue(vq, slot, sc->sc_tx_dmamaps[slot], 1);
+ virtio_enqueue_commit(vsc, vq, slot, 0);
+ queued++;
+#if NBPFILTER > 0
+ if (ifp->if_bpf)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif
+ }
+ if (ifp->if_flags & IFF_OACTIVE) {
+ int r;
+ if (vsc->sc_features & VIRTIO_F_RING_EVENT_IDX)
+ r = virtio_postpone_intr_smart(&sc->sc_vq[VQTX]);
+ else
+ r = virtio_start_vq_intr(vsc, &sc->sc_vq[VQTX]);
+ if (r) {
+ vio_txeof(vq);
+ goto again;
+ }
+ }
+
+ if (queued > 0) {
+ virtio_notify(vsc, vq);
+ timeout_add_sec(&sc->sc_tick, 1);
+ }
+}
+
+int
+vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct vio_softc *sc = ifp->if_softc;
+ int s, r = 0;
+ struct ifaddr *ifa = (struct ifaddr *)data;
+
+ s = splnet();
+ switch (cmd) {
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ if (!(ifp->if_flags & IFF_RUNNING))
+ vio_init(ifp);
+#ifdef INET
+ if (ifa->ifa_addr->sa_family == AF_INET)
+ arp_ifinit(&sc->sc_ac, ifa);
+#endif
+ break;
+ case SIOCSIFFLAGS:
+ if (ifp->if_flags & IFF_UP) {
+ if (ifp->if_flags & IFF_RUNNING)
+ r = ENETRESET;
+ else
+ vio_init(ifp);
+ } else {
+ if (ifp->if_flags & IFF_RUNNING)
+ vio_stop(ifp, 1);
+ }
+ break;
+ case SIOCGIFMEDIA:
+ case SIOCSIFMEDIA:
+ r = ifmedia_ioctl(ifp, (struct ifreq *)data, &sc->sc_media,
+ cmd);
+ break;
+ default:
+ r = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
+ }
+
+ if (r == ENETRESET) {
+ if (ifp->if_flags & IFF_RUNNING)
+ vio_iff(sc);
+ r = 0;
+ }
+ splx(s);
+ return r;
+}
+
+/*
+ * Recieve implementation
+ */
+/* allocate and initialize a mbuf for recieve */
+int
+vio_add_rx_mbuf(struct vio_softc *sc, int i)
+{
+ struct mbuf *m;
+ int r;
+
+ m = MCLGETI(NULL, M_DONTWAIT, &sc->sc_ac.ac_if, MCLBYTES);
+ if (m == NULL)
+ return ENOBUFS;
+ sc->sc_rx_mbufs[i] = m;
+ m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
+ r = bus_dmamap_load_mbuf(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i],
+ m, BUS_DMA_READ|BUS_DMA_NOWAIT);
+ if (r) {
+ m_freem(m);
+ sc->sc_rx_mbufs[i] = 0;
+ return r;
+ }
+
+ return 0;
+}
+
+/* free a mbuf for recieve */
+void
+vio_free_rx_mbuf(struct vio_softc *sc, int i)
+{
+ bus_dmamap_unload(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i]);
+ m_freem(sc->sc_rx_mbufs[i]);
+ sc->sc_rx_mbufs[i] = NULL;
+}
+
+/* add mbufs for all the empty recieve slots */
+void
+vio_populate_rx_mbufs(struct vio_softc *sc)
+{
+ struct virtio_softc *vsc = sc->sc_virtio;
+ int i, r, ndone = 0;
+ struct virtqueue *vq = &sc->sc_vq[VQRX];
+
+ for (i = 0; i < vq->vq_num; i++) {
+ int slot;
+ struct virtio_net_hdr *hdr;
+ r = virtio_enqueue_prep(vq, &slot);
+ if (r == EAGAIN)
+ break;
+ if (r != 0)
+ panic("enqueue_prep for rx buffers: %d", r);
+ if (sc->sc_rx_mbufs[slot] == NULL) {
+ r = vio_add_rx_mbuf(sc, slot);
+ if (r != 0) {
+ virtio_enqueue_abort(vq, slot);
+ break;
+ }
+ }
+ r = virtio_enqueue_reserve(vq, slot,
+ sc->sc_rx_dmamaps[slot]->dm_nsegs + 1);
+ if (r != 0) {
+ vio_free_rx_mbuf(sc, slot);
+ break;
+ }
+ hdr = &sc->sc_rx_hdrs[slot];
+ VIO_DMAMEM_SYNC(vsc, sc, hdr, sizeof(*hdr),
+ BUS_DMASYNC_PREREAD);
+ bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0,
+ MCLBYTES, BUS_DMASYNC_PREREAD);
+ VIO_DMAMEM_ENQUEUE(sc, vq, slot, hdr, sizeof(*hdr), 0);
+ virtio_enqueue(vq, slot, sc->sc_rx_dmamaps[slot], 0);
+ virtio_enqueue_commit(vsc, vq, slot, 0);
+ ndone++;
+ }
+ if (ndone > 0)
+ virtio_notify(vsc, vq);
+}
+
+/* dequeue recieved packets */
+int
+vio_rxeof(struct vio_softc *sc)
+{
+ struct virtio_softc *vsc = sc->sc_virtio;
+ struct virtqueue *vq = &sc->sc_vq[VQRX];
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+ struct mbuf *m;
+ int r = 0;
+ int slot, len;
+
+ while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
+ struct virtio_net_hdr *hdr = &sc->sc_rx_hdrs[slot];
+ len -= sizeof(struct virtio_net_hdr);
+ r = 1;
+ VIO_DMAMEM_SYNC(vsc, sc, hdr, sizeof(*hdr),
+ BUS_DMASYNC_POSTREAD);
+ bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0,
+ MCLBYTES, BUS_DMASYNC_POSTREAD);
+ m = sc->sc_rx_mbufs[slot];
+ KASSERT(m != NULL);
+ bus_dmamap_unload(vsc->sc_dmat, sc->sc_rx_dmamaps[slot]);
+ sc->sc_rx_mbufs[slot] = 0;
+ virtio_dequeue_commit(vq, slot);
+ m->m_pkthdr.rcvif = ifp;
+ m->m_len = m->m_pkthdr.len = len;
+ m->m_pkthdr.csum_flags = 0;
+ ifp->if_ipackets++;
+#if NBPFILTER > 0
+ if (ifp->if_bpf)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_IN);
+#endif
+ ether_input_mbuf(ifp, m);
+ }
+ return r;
+}
+
+int
+vio_rx_intr(struct virtqueue *vq)
+{
+ struct virtio_softc *vsc = vq->vq_owner;
+ struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
+ int r, sum = 0;
+
+again:
+ r = vio_rxeof(sc);
+ sum += r;
+ if (r) {
+ vio_populate_rx_mbufs(sc);
+ /* set used event index to the next slot */
+ if (vsc->sc_features & VIRTIO_F_RING_EVENT_IDX) {
+ if (virtio_start_vq_intr(vq->vq_owner, vq))
+ goto again;
+ }
+ }
+
+ return sum;
+}
+
+/* free all the mbufs; called from if_stop(disable) */
+void
+vio_rx_drain(struct vio_softc *sc)
+{
+ struct virtqueue *vq = &sc->sc_vq[VQRX];
+ int i;
+
+ for (i = 0; i < vq->vq_num; i++) {
+ if (sc->sc_rx_mbufs[i] == NULL)
+ continue;
+ vio_free_rx_mbuf(sc, i);
+ }
+}
+
+/*
+ * Transmition implementation
+ */
+/* actual transmission is done in if_start */
+/* tx interrupt; dequeue and free mbufs */
+/*
+ * tx interrupt is actually disabled unless the tx queue is full, i.e.
+ * IFF_OACTIVE is set. vio_txtick is used to make sure that mbufs
+ * are dequeued and freed even if no further transfer happens.
+ */
+int
+vio_tx_intr(struct virtqueue *vq)
+{
+ struct virtio_softc *vsc = vq->vq_owner;
+ struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+ int r;
+
+ r = vio_txeof(vq);
+ if (!IFQ_IS_EMPTY(&ifp->if_snd))
+ vio_start(ifp);
+ return r;
+}
+
+void
+vio_txtick(void *arg)
+{
+ struct virtqueue *vq = arg;
+ int s = splnet();
+ vio_tx_intr(vq);
+ splx(s);
+}
+
+int
+vio_txeof(struct virtqueue *vq)
+{
+ struct virtio_softc *vsc = vq->vq_owner;
+ struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+ struct mbuf *m;
+ int r = 0;
+ int slot, len;
+
+ while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
+ struct virtio_net_hdr *hdr = &sc->sc_tx_hdrs[slot];
+ r++;
+ VIO_DMAMEM_SYNC(vsc, sc, hdr, sizeof(*hdr),
+ BUS_DMASYNC_POSTWRITE);
+ bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0,
+ sc->sc_tx_dmamaps[slot]->dm_mapsize,
+ BUS_DMASYNC_POSTWRITE);
+ m = sc->sc_tx_mbufs[slot];
+ bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[slot]);
+ sc->sc_tx_mbufs[slot] = 0;
+ virtio_dequeue_commit(vq, slot);
+ ifp->if_opackets++;
+ m_freem(m);
+ }
+
+ if (r) {
+ ifp->if_flags &= ~IFF_OACTIVE;
+ virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
+ }
+ if (vq->vq_used_idx == vq->vq_avail_idx)
+ timeout_del(&sc->sc_tick);
+ else if (r)
+ timeout_add_sec(&sc->sc_tick, 1);
+ return r;
+}
+
+int
+vio_encap(struct vio_softc *sc, int slot, struct mbuf *m,
+ struct mbuf **mnew)
+{
+ struct virtio_softc *vsc = sc->sc_virtio;
+ bus_dmamap_t dmap= sc->sc_tx_dmamaps[slot];
+ struct mbuf *m0 = NULL;
+ int r;
+
+ r = bus_dmamap_load_mbuf(vsc->sc_dmat, dmap, m,
+ BUS_DMA_WRITE|BUS_DMA_NOWAIT);
+ if (r == 0) {
+ *mnew = m;
+ return r;
+ }
+ if (r != EFBIG)
+ return r;
+ /* EFBIG: mbuf chain is too fragmented */
+ MGETHDR(m0, M_DONTWAIT, MT_DATA);
+ if (m0 == NULL)
+ return ENOBUFS;
+ if (m->m_pkthdr.len > MHLEN) {
+ MCLGETI(m0, M_DONTWAIT, NULL, m->m_pkthdr.len);
+ if (!(m0->m_flags & M_EXT)) {
+ m_freem(m0);
+ return ENOBUFS;
+ }
+ }
+ m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
+ m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len;
+ r = bus_dmamap_load_mbuf(vsc->sc_dmat, dmap, m0,
+ BUS_DMA_NOWAIT|BUS_DMA_WRITE);
+ if (r != 0) {
+ m_freem(m0);
+ printf("%s: tx dmamap load error %d\n", sc->sc_dev.dv_xname,
+ r);
+ return ENOBUFS;
+ }
+ m_freem(m);
+ *mnew = m0;
+ return 0;
+}
+
+/* free all the mbufs already put on vq; called from if_stop(disable) */
+void
+vio_tx_drain(struct vio_softc *sc)
+{
+ struct virtio_softc *vsc = sc->sc_virtio;
+ struct virtqueue *vq = &sc->sc_vq[VQTX];
+ int i;
+
+ for (i = 0; i < vq->vq_num; i++) {
+ if (sc->sc_tx_mbufs[i] == NULL)
+ continue;
+ bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
+ m_freem(sc->sc_tx_mbufs[i]);
+ sc->sc_tx_mbufs[i] = NULL;
+ }
+}
+
+/*
+ * Control vq
+ */
+/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
+int
+vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
+{
+ struct virtio_softc *vsc = sc->sc_virtio;
+ struct virtqueue *vq = &sc->sc_vq[VQCTL];
+ int r, slot;
+
+ if (vsc->sc_nvqs < 3)
+ return ENOTSUP;
+
+ splassert(IPL_NET);
+ vio_wait_ctrl(sc);
+
+ sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX;
+ sc->sc_ctrl_cmd->command = cmd;
+ sc->sc_ctrl_rx->onoff = onoff;
+
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
+ sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE);
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx,
+ sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_PREWRITE);
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
+ sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD);
+
+ r = virtio_enqueue_prep(vq, &slot);
+ if (r != 0)
+ panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
+ r = virtio_enqueue_reserve(vq, slot, 3);
+ if (r != 0)
+ panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
+ VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd,
+ sizeof(*sc->sc_ctrl_cmd), 1);
+ VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_rx,
+ sizeof(*sc->sc_ctrl_rx), 1);
+ VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
+ sizeof(*sc->sc_ctrl_status), 0);
+ virtio_enqueue_commit(vsc, vq, slot, 1);
+
+ if (vio_wait_ctrl_done(sc)) {
+ r = EIO;
+ goto out;
+ }
+
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
+ sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx,
+ sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_POSTWRITE);
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
+ sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
+
+ if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
+ r = 0;
+ } else {
+ printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname, cmd);
+ r = EIO;
+ }
+
+ DBGPRINT("cmd %d %d: %d", cmd, (int)onoff, r);
+out:
+ vio_ctrl_wakeup(sc, FREE);
+ return r;
+}
+
+void
+vio_wait_ctrl(struct vio_softc *sc)
+{
+ while (sc->sc_ctrl_inuse != FREE)
+ tsleep(&sc->sc_ctrl_inuse, IPL_NET, "vio_wait", 0);
+ sc->sc_ctrl_inuse = INUSE;
+}
+
+int
+vio_wait_ctrl_done(struct vio_softc *sc)
+{
+ int r = 0;
+ while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) {
+ if (sc->sc_ctrl_inuse == RESET) {
+ r = 1;
+ break;
+ }
+ tsleep(&sc->sc_ctrl_inuse, IPL_NET, "vio_wait", 0);
+ }
+ return r;
+}
+
+void
+vio_ctrl_wakeup(struct vio_softc *sc, enum vio_ctrl_state new)
+{
+ sc->sc_ctrl_inuse = new;
+ wakeup(&sc->sc_ctrl_inuse);
+}
+
+int
+vio_ctrleof(struct virtqueue *vq)
+{
+ struct virtio_softc *vsc = vq->vq_owner;
+ struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
+ int r = 0, ret, slot;
+
+again:
+ ret = virtio_dequeue(vsc, vq, &slot, NULL);
+ if (ret == ENOENT)
+ return r;
+ virtio_dequeue_commit(vq, slot);
+ r++;
+ vio_ctrl_wakeup(sc, DONE);
+ if (virtio_start_vq_intr(vsc, vq))
+ goto again;
+
+ return r;
+}
+
+/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
+int
+vio_set_rx_filter(struct vio_softc *sc)
+{
+ /* filter already set in sc_ctrl_mac_tbl */
+ struct virtio_softc *vsc = sc->sc_virtio;
+ struct virtqueue *vq = &sc->sc_vq[VQCTL];
+ int r, slot;
+
+ splassert(IPL_NET);
+
+ if (vsc->sc_nvqs < 3)
+ return ENOTSUP;
+
+ vio_wait_ctrl(sc);
+
+ sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC;
+ sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET;
+
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
+ sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE);
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_info,
+ VIO_CTRL_MAC_INFO_SIZE, BUS_DMASYNC_PREWRITE);
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
+ sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD);
+
+ r = virtio_enqueue_prep(vq, &slot);
+ if (r != 0)
+ panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
+ r = virtio_enqueue_reserve(vq, slot, 4);
+ if (r != 0)
+ panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
+ VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd,
+ sizeof(*sc->sc_ctrl_cmd), 1);
+ VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_mac_tbl_uc,
+ sizeof(*sc->sc_ctrl_mac_tbl_uc) +
+ sc->sc_ctrl_mac_tbl_uc->nentries * ETHER_ADDR_LEN, 1);
+ VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_mac_tbl_mc,
+ sizeof(*sc->sc_ctrl_mac_tbl_mc) +
+ sc->sc_ctrl_mac_tbl_mc->nentries * ETHER_ADDR_LEN, 1);
+ VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
+ sizeof(*sc->sc_ctrl_status), 0);
+ virtio_enqueue_commit(vsc, vq, slot, 1);
+
+ if (vio_wait_ctrl_done(sc)) {
+ r = EIO;
+ goto out;
+ }
+
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
+ sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_info,
+ VIO_CTRL_MAC_INFO_SIZE, BUS_DMASYNC_POSTWRITE);
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
+ sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
+
+ if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
+ r = 0;
+ } else {
+ printf("%s: failed setting rx filter\n", sc->sc_dev.dv_xname);
+ r = EIO;
+ }
+
+out:
+ vio_ctrl_wakeup(sc, FREE);
+ return r;
+}
+
+/*
+ * If IFF_PROMISC requested, set promiscuous
+ * If multicast filter small enough (<=MAXENTRIES) set rx filter
+ * If large multicast filter exist use ALLMULTI
+ */
+/*
+ * If setting rx filter fails fall back to ALLMULTI
+ * If ALLMULTI fails fall back to PROMISC
+ */
+int
+vio_iff(struct vio_softc *sc)
+{
+ struct virtio_softc *vsc = sc->sc_virtio;
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+ struct ether_multi *enm;
+ struct ether_multistep step;
+ int nentries = 0;
+ int promisc = 0, allmulti = 0, rxfilter = 0;
+ int r;
+
+ splassert(IPL_NET);
+
+ if (vsc->sc_nvqs < 3) {
+ /* no ctrl vq; always promisc */
+ ifp->if_flags |= IFF_PROMISC;
+ return 0;
+ }
+
+ if (ifp->if_flags & IFF_PROMISC) {
+ promisc = 1;
+ goto set;
+ }
+
+ ETHER_FIRST_MULTI(step, &sc->sc_ac, enm);
+ while (enm != NULL) {
+ if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
+ allmulti = 1;
+ goto set;
+ }
+ if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
+ allmulti = 1;
+ goto set;
+ }
+ memcpy(sc->sc_ctrl_mac_tbl_mc->macs[nentries], enm->enm_addrlo,
+ ETHER_ADDR_LEN);
+ ETHER_NEXT_MULTI(step, enm);
+ nentries++;
+ }
+ rxfilter = 1;
+
+set:
+ if (rxfilter) {
+ sc->sc_ctrl_mac_tbl_uc->nentries = 0;
+ sc->sc_ctrl_mac_tbl_mc->nentries = nentries;
+ r = vio_set_rx_filter(sc);
+ if (r != 0) {
+ rxfilter = 0;
+ allmulti = 1; /* fallback */
+ }
+ } else {
+ /* remove rx filter */
+ sc->sc_ctrl_mac_tbl_uc->nentries = 0;
+ sc->sc_ctrl_mac_tbl_mc->nentries = 0;
+ r = vio_set_rx_filter(sc);
+ /* what to do on failure? */
+ }
+ if (allmulti) {
+ r = vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, 1);
+ if (r != 0) {
+ allmulti = 0;
+ promisc = 1; /* fallback */
+ }
+ } else {
+ r = vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, 0);
+ /* what to do on failure? */
+ }
+
+ return vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, promisc);
+}
diff --git a/sys/dev/pci/vioblk.c b/sys/dev/pci/vioblk.c
new file mode 100644
index 00000000000..c87278b8a7d
--- /dev/null
+++ b/sys/dev/pci/vioblk.c
@@ -0,0 +1,619 @@
+/*
+ * Copyright (c) 2012 Stefan Fritsch.
+ * Copyright (c) 2010 Minoura Makoto.
+ * Copyright (c) 1998, 2001 Manuel Bouyer.
+ * All rights reserved.
+ *
+ * This code is based in part on the NetBSD ld_virtio driver and the
+ * OpenBSD vdsk driver.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, 2011 Mark Kettenis
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <machine/bus.h>
+
+#include <sys/device.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <dev/pci/pcivar.h>
+#include <dev/pci/virtioreg.h>
+#include <dev/pci/virtiovar.h>
+#include <dev/pci/vioblkreg.h>
+
+#include <scsi/scsi_all.h>
+#include <scsi/scsi_disk.h>
+#include <scsi/scsiconf.h>
+
+#define VIOBLK_DONE -1
+
+struct virtio_feature_name vioblk_feature_names[] = {
+ { VIRTIO_BLK_F_BARRIER, "Barrier" },
+ { VIRTIO_BLK_F_SIZE_MAX, "SizeMax" },
+ { VIRTIO_BLK_F_SEG_MAX, "SegMax" },
+ { VIRTIO_BLK_F_GEOMETRY, "Geometry" },
+ { VIRTIO_BLK_F_RO, "RO" },
+ { VIRTIO_BLK_F_BLK_SIZE, "BlkSize" },
+ { VIRTIO_BLK_F_SCSI, "SCSI" },
+ { VIRTIO_BLK_F_FLUSH, "Flush" },
+ { VIRTIO_BLK_F_TOPOLOGY, "Topology" },
+ { 0, NULL }
+};
+
+struct virtio_blk_req {
+ struct virtio_blk_req_hdr vr_hdr;
+ uint8_t vr_status;
+ struct scsi_xfer *vr_xs;
+ int vr_len;
+ bus_dmamap_t vr_cmdsts;
+ bus_dmamap_t vr_payload;
+};
+
+struct vioblk_softc {
+ struct device sc_dev;
+ struct virtio_softc *sc_virtio;
+
+ struct virtqueue sc_vq[1];
+ struct virtio_blk_req *sc_reqs;
+ bus_dma_segment_t sc_reqs_segs[1];
+
+ struct scsi_adapter sc_switch;
+ struct scsi_link sc_link;
+
+ int sc_notify_on_empty;
+
+ uint32_t sc_queued;
+
+ /* device configuration */
+ uint64_t sc_capacity;
+ uint32_t sc_xfer_max;
+ uint32_t sc_seg_max;
+};
+
+int vioblk_match(struct device *, void *, void *);
+void vioblk_attach(struct device *, struct device *, void *);
+int vioblk_alloc_reqs(struct vioblk_softc *, int);
+int vioblk_vq_done(struct virtqueue *);
+void vioblk_vq_done1(struct vioblk_softc *, struct virtio_softc *,
+ struct virtqueue *, int);
+void vioblk_minphys(struct buf *, struct scsi_link *);
+
+void vioblk_scsi_cmd(struct scsi_xfer *);
+int vioblk_dev_probe(struct scsi_link *);
+void vioblk_dev_free(struct scsi_link *);
+
+void vioblk_scsi_inq(struct scsi_xfer *);
+void vioblk_scsi_capacity(struct scsi_xfer *);
+void vioblk_scsi_capacity16(struct scsi_xfer *);
+void vioblk_scsi_done(struct scsi_xfer *, int);
+
+struct cfattach vioblk_ca = {
+ sizeof(struct vioblk_softc),
+ vioblk_match,
+ vioblk_attach,
+ NULL
+};
+
+struct cfdriver vioblk_cd = {
+ NULL, "vioblk", DV_DULL
+};
+
+
+int vioblk_match(struct device *parent, void *match, void *aux)
+{
+ struct virtio_softc *va = aux;
+ if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_BLOCK)
+ return 1;
+ return 0;
+}
+
+#if VIRTIO_DEBUG > 0
+#define DBGPRINT(fmt, args...) printf("%s: " fmt "\n", __func__, ## args)
+#else
+#define DBGPRINT(fmt, args...) do {} while (0)
+#endif
+
+void
+vioblk_minphys(struct buf *bp, struct scsi_link *sl)
+{
+ struct vioblk_softc *sc = sl->adapter_softc;
+ if (bp->b_bcount > sc->sc_xfer_max)
+ bp->b_bcount = sc->sc_xfer_max;
+}
+
+void
+vioblk_attach(struct device *parent, struct device *self, void *aux)
+{
+ struct vioblk_softc *sc = (struct vioblk_softc *)self;
+ struct virtio_softc *vsc = (struct virtio_softc *)parent;
+ struct scsibus_attach_args saa;
+ uint32_t features;
+ int qsize;
+
+ vsc->sc_vqs = &sc->sc_vq[0];
+ vsc->sc_nvqs = 1;
+ vsc->sc_config_change = 0;
+ if (vsc->sc_child)
+ panic("already attached to something else");
+ vsc->sc_child = self;
+ vsc->sc_ipl = IPL_BIO;
+ vsc->sc_intrhand = virtio_vq_intr;
+ sc->sc_virtio = vsc;
+
+ features = virtio_negotiate_features(vsc,
+ (VIRTIO_BLK_F_RO | VIRTIO_F_NOTIFY_ON_EMPTY |
+ VIRTIO_BLK_F_SIZE_MAX | VIRTIO_BLK_F_SEG_MAX |
+ VIRTIO_BLK_F_FLUSH),
+ vioblk_feature_names);
+
+
+ if (features & VIRTIO_BLK_F_SIZE_MAX) {
+ uint32_t size_max = virtio_read_device_config_4(vsc,
+ VIRTIO_BLK_CONFIG_SIZE_MAX);
+ if (size_max < NBPG) {
+ printf("\nMax segment size %u too low\n", size_max);
+ goto err;
+ }
+ }
+
+ if (features & VIRTIO_BLK_F_SEG_MAX) {
+ sc->sc_seg_max = virtio_read_device_config_4(vsc,
+ VIRTIO_BLK_CONFIG_SEG_MAX);
+ sc->sc_seg_max = MIN(sc->sc_seg_max, MAXPHYS/NBPG + 2);
+ } else {
+ sc->sc_seg_max = MAXPHYS/NBPG + 2;
+ }
+ sc->sc_xfer_max = (sc->sc_seg_max - 2) * NBPG;
+
+ sc->sc_capacity = virtio_read_device_config_8(vsc,
+ VIRTIO_BLK_CONFIG_CAPACITY);
+
+ if (virtio_alloc_vq(vsc, &sc->sc_vq[0], 0, sc->sc_xfer_max,
+ sc->sc_seg_max, "I/O request") != 0) {
+ printf("\nCan't alloc virtqueue\n");
+ goto err;
+ }
+ qsize = sc->sc_vq[0].vq_num;
+ sc->sc_vq[0].vq_done = vioblk_vq_done;
+ if (vioblk_alloc_reqs(sc, qsize) < 0) {
+ printf("\nCan't alloc reqs\n");
+ goto err;
+ }
+
+ if (features & VIRTIO_F_NOTIFY_ON_EMPTY) {
+ virtio_stop_vq_intr(vsc, &sc->sc_vq[0]);
+ sc->sc_notify_on_empty = 1;
+ }
+ else {
+ sc->sc_notify_on_empty = 0;
+ }
+
+ sc->sc_queued = 0;
+
+ sc->sc_switch.scsi_cmd = vioblk_scsi_cmd;
+ sc->sc_switch.scsi_minphys = vioblk_minphys;
+ sc->sc_switch.dev_probe = vioblk_dev_probe;
+ sc->sc_switch.dev_free = vioblk_dev_free;
+
+ sc->sc_link.adapter = &sc->sc_switch;
+ sc->sc_link.adapter_softc = self;
+ sc->sc_link.adapter_buswidth = 2;
+ sc->sc_link.luns = 1;
+ sc->sc_link.adapter_target = 2;
+ sc->sc_link.openings = qsize;
+ DBGPRINT("; qsize: %d seg_max: %d", qsize, sc->sc_seg_max);
+ if (features & VIRTIO_BLK_F_RO)
+ sc->sc_link.flags |= SDEV_READONLY;
+
+ bzero(&saa, sizeof(saa));
+ saa.saa_sc_link = &sc->sc_link;
+ printf("\n");
+ config_found(self, &saa, scsiprint);
+
+ return;
+err:
+ vsc->sc_child = VIRTIO_CHILD_ERROR;
+ return;
+}
+
+int
+vioblk_vq_done(struct virtqueue *vq)
+{
+ struct virtio_softc *vsc = vq->vq_owner;
+ struct vioblk_softc *sc = (struct vioblk_softc *)vsc->sc_child;
+ int slot;
+ int ret = 0;
+
+ if (!sc->sc_notify_on_empty)
+ virtio_stop_vq_intr(vsc, vq);
+ for (;;) {
+ if (virtio_dequeue(vsc, vq, &slot, NULL) != 0) {
+ if (sc->sc_notify_on_empty)
+ break;
+ virtio_start_vq_intr(vsc, vq);
+ if (virtio_dequeue(vsc, vq, &slot, NULL) != 0)
+ break;
+ }
+ vioblk_vq_done1(sc, vsc, vq, slot);
+ ret = 1;
+ }
+ return ret;
+}
+
+void
+vioblk_vq_done1(struct vioblk_softc *sc, struct virtio_softc *vsc,
+ struct virtqueue *vq, int slot)
+{
+ struct virtio_blk_req *vr = &sc->sc_reqs[slot];
+ struct scsi_xfer *xs = vr->vr_xs;
+ KASSERT(vr->vr_len != VIOBLK_DONE);
+ bus_dmamap_sync(vsc->sc_dmat, vr->vr_cmdsts, 0,
+ sizeof(struct virtio_blk_req_hdr), BUS_DMASYNC_POSTWRITE);
+ if (vr->vr_hdr.type != VIRTIO_BLK_T_FLUSH) {
+ bus_dmamap_sync(vsc->sc_dmat, vr->vr_payload, 0, vr->vr_len,
+ (vr->vr_hdr.type == VIRTIO_BLK_T_IN) ?
+ BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
+ }
+ bus_dmamap_sync(vsc->sc_dmat, vr->vr_cmdsts,
+ sizeof(struct virtio_blk_req_hdr), sizeof(uint8_t),
+ BUS_DMASYNC_POSTREAD);
+
+
+ if (vr->vr_status != VIRTIO_BLK_S_OK) {
+ DBGPRINT("EIO");
+ xs->error = XS_DRIVER_STUFFUP;
+ xs->resid = xs->datalen;
+ } else {
+ xs->error = XS_NOERROR;
+ xs->resid = xs->datalen - vr->vr_len;
+ }
+ scsi_done(xs);
+ vr->vr_len = VIOBLK_DONE;
+
+ virtio_dequeue_commit(vq, slot);
+}
+
+void
+vioblk_scsi_cmd(struct scsi_xfer *xs)
+{
+ struct scsi_rw *rw;
+ struct scsi_rw_big *rwb;
+ u_int64_t lba = 0;
+ u_int32_t sector_count;
+ uint8_t operation;
+ int isread;
+
+ switch (xs->cmd->opcode) {
+ case READ_BIG:
+ case READ_COMMAND:
+ operation = VIRTIO_BLK_T_IN;
+ isread = 1;
+ break;
+ case WRITE_BIG:
+ case WRITE_COMMAND:
+ operation = VIRTIO_BLK_T_OUT;
+ isread = 0;
+ break;
+
+ case SYNCHRONIZE_CACHE:
+ operation = VIRTIO_BLK_T_FLUSH;
+ break;
+
+ case INQUIRY:
+ vioblk_scsi_inq(xs);
+ return;
+ case READ_CAPACITY:
+ vioblk_scsi_capacity(xs);
+ return;
+ case READ_CAPACITY_16:
+ vioblk_scsi_capacity16(xs);
+ return;
+
+ case TEST_UNIT_READY:
+ case START_STOP:
+ case PREVENT_ALLOW:
+ vioblk_scsi_done(xs, XS_NOERROR);
+ return;
+
+ default:
+ printf("%s cmd 0x%02x\n", __func__, xs->cmd->opcode);
+ case MODE_SENSE:
+ case MODE_SENSE_BIG:
+ case REPORT_LUNS:
+ vioblk_scsi_done(xs, XS_DRIVER_STUFFUP);
+ return;
+ }
+
+ if (xs->cmdlen == 6) {
+ rw = (struct scsi_rw *)xs->cmd;
+ lba = _3btol(rw->addr) & (SRW_TOPADDR << 16 | 0xffff);
+ sector_count = rw->length ? rw->length : 0x100;
+ } else {
+ rwb = (struct scsi_rw_big *)xs->cmd;
+ lba = _4btol(rwb->addr);
+ sector_count = _2btol(rwb->length);
+ }
+
+{
+ struct vioblk_softc *sc = xs->sc_link->adapter_softc;
+ struct virtqueue *vq = &sc->sc_vq[0];
+ struct virtio_blk_req *vr;
+ struct virtio_softc *vsc = sc->sc_virtio;
+ int len, s;
+ int timeout;
+ int slot, ret, nsegs;
+
+ s = splbio();
+ ret = virtio_enqueue_prep(vq, &slot);
+ if (ret) {
+ DBGPRINT("virtio_enqueue_prep: %d, vq_num: %d, sc_queued: %d",
+ ret, vq->vq_num, sc->sc_queued);
+ vioblk_scsi_done(xs, XS_NO_CCB);
+ splx(s);
+ return;
+ }
+ vr = &sc->sc_reqs[slot];
+ if (operation != VIRTIO_BLK_T_FLUSH) {
+ len = MIN(xs->datalen, sector_count * VIRTIO_BLK_SECTOR_SIZE);
+ ret = bus_dmamap_load(vsc->sc_dmat, vr->vr_payload,
+ xs->data, len, NULL,
+ ((isread ? BUS_DMA_READ : BUS_DMA_WRITE) |
+ BUS_DMA_NOWAIT));
+ if (ret) {
+ DBGPRINT("bus_dmamap_load: %d", ret);
+ goto out_enq_abort;
+ }
+ nsegs = vr->vr_payload->dm_nsegs + 2;
+ } else {
+ len = 0;
+ nsegs = 2;
+ }
+ ret = virtio_enqueue_reserve(vq, slot, nsegs);
+ if (ret) {
+ DBGPRINT("virtio_enqueue_reserve: %d", ret);
+ bus_dmamap_unload(vsc->sc_dmat, vr->vr_payload);
+ goto out_done;
+ }
+ vr->vr_xs = xs;
+ vr->vr_hdr.type = operation;
+ vr->vr_hdr.ioprio = 0;
+ vr->vr_hdr.sector = lba;
+ vr->vr_len = len;
+
+ bus_dmamap_sync(vsc->sc_dmat, vr->vr_cmdsts,
+ 0, sizeof(struct virtio_blk_req_hdr),
+ BUS_DMASYNC_PREWRITE);
+ if (operation != VIRTIO_BLK_T_FLUSH) {
+ bus_dmamap_sync(vsc->sc_dmat, vr->vr_payload, 0, len,
+ isread ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE);
+ }
+ bus_dmamap_sync(vsc->sc_dmat, vr->vr_cmdsts,
+ offsetof(struct virtio_blk_req, vr_status), sizeof(uint8_t),
+ BUS_DMASYNC_PREREAD);
+
+ virtio_enqueue_p(vq, slot, vr->vr_cmdsts, 0,
+ sizeof(struct virtio_blk_req_hdr), 1);
+ if (operation != VIRTIO_BLK_T_FLUSH)
+ virtio_enqueue(vq, slot, vr->vr_payload, !isread);
+ virtio_enqueue_p(vq, slot, vr->vr_cmdsts,
+ offsetof(struct virtio_blk_req, vr_status), sizeof(uint8_t), 0);
+ virtio_enqueue_commit(vsc, vq, slot, 1);
+ sc->sc_queued++;
+
+ if (!ISSET(xs->flags, SCSI_POLL)) {
+ /* check if some xfers are done: */
+ if (sc->sc_queued > 1)
+ vioblk_vq_done(vq);
+ splx(s);
+ return;
+ }
+
+ timeout = 1000;
+ do {
+ if (vsc->sc_ops->intr(vsc) && vr->vr_len == VIOBLK_DONE)
+ break;
+
+ delay(1000);
+ } while(--timeout > 0);
+ splx(s);
+ return;
+
+out_enq_abort:
+ virtio_enqueue_abort(vq, slot);
+out_done:
+ vioblk_scsi_done(xs, XS_NO_CCB);
+ vr->vr_len = VIOBLK_DONE;
+ splx(s);
+}
+}
+
+void
+vioblk_scsi_inq(struct scsi_xfer *xs)
+{
+ struct scsi_inquiry *inq = (struct scsi_inquiry *)xs->cmd;
+ struct scsi_inquiry_data inqd;
+
+ if (ISSET(inq->flags, SI_EVPD)) {
+ vioblk_scsi_done(xs, XS_DRIVER_STUFFUP);
+ return;
+ }
+
+ bzero(&inqd, sizeof(inqd));
+
+ inqd.device = T_DIRECT;
+ inqd.version = 0x05; /* SPC-3 */
+ inqd.response_format = 2;
+ inqd.additional_length = 32;
+ inqd.flags |= SID_CmdQue;
+ bcopy("VirtIO ", inqd.vendor, sizeof(inqd.vendor));
+ bcopy("Block Device ", inqd.product, sizeof(inqd.product));
+
+ bcopy(&inqd, xs->data, MIN(sizeof(inqd), xs->datalen));
+ vioblk_scsi_done(xs, XS_NOERROR);
+}
+
+void
+vioblk_scsi_capacity(struct scsi_xfer *xs)
+{
+ struct vioblk_softc *sc = xs->sc_link->adapter_softc;
+ struct scsi_read_cap_data rcd;
+ uint64_t capacity;
+
+ bzero(&rcd, sizeof(rcd));
+
+ capacity = sc->sc_capacity - 1;
+ if (capacity > 0xffffffff)
+ capacity = 0xffffffff;
+
+ _lto4b(capacity, rcd.addr);
+ _lto4b(VIRTIO_BLK_SECTOR_SIZE, rcd.length);
+
+ bcopy(&rcd, xs->data, MIN(sizeof(rcd), xs->datalen));
+ vioblk_scsi_done(xs, XS_NOERROR);
+}
+
+void
+vioblk_scsi_capacity16(struct scsi_xfer *xs)
+{
+ struct vioblk_softc *sc = xs->sc_link->adapter_softc;
+ struct scsi_read_cap_data_16 rcd;
+
+ bzero(&rcd, sizeof(rcd));
+
+ _lto8b(sc->sc_capacity - 1, rcd.addr);
+ _lto4b(VIRTIO_BLK_SECTOR_SIZE, rcd.length);
+
+ bcopy(&rcd, xs->data, MIN(sizeof(rcd), xs->datalen));
+ vioblk_scsi_done(xs, XS_NOERROR);
+}
+
+void
+vioblk_scsi_done(struct scsi_xfer *xs, int error)
+{
+ xs->error = error;
+ scsi_done(xs);
+}
+
+int
+vioblk_dev_probe(struct scsi_link *link)
+{
+ KASSERT(link->lun == 0);
+ if (link->target == 0)
+ return (0);
+ return (ENODEV);
+}
+
+void
+vioblk_dev_free(struct scsi_link *link)
+{
+ printf("%s\n", __func__);
+}
+
+int
+vioblk_alloc_reqs(struct vioblk_softc *sc, int qsize)
+{
+ int allocsize, r, rsegs, i;
+ void *vaddr;
+
+ allocsize = sizeof(struct virtio_blk_req) * qsize;
+ r = bus_dmamem_alloc(sc->sc_virtio->sc_dmat, allocsize, 0, 0,
+ &sc->sc_reqs_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
+ if (r != 0) {
+ printf("DMA memory allocation failed, size %d, error %d\n",
+ allocsize, r);
+ goto err_none;
+ }
+ r = bus_dmamem_map(sc->sc_virtio->sc_dmat, &sc->sc_reqs_segs[0], 1,
+ allocsize, (caddr_t *)&vaddr, BUS_DMA_NOWAIT);
+ if (r != 0) {
+ printf("DMA memory map failed, error %d\n", r);
+ goto err_dmamem_alloc;
+ }
+ sc->sc_reqs = vaddr;
+ memset(vaddr, 0, allocsize);
+ for (i = 0; i < qsize; i++) {
+ struct virtio_blk_req *vr = &sc->sc_reqs[i];
+ vr->vr_len = VIOBLK_DONE;
+ r = bus_dmamap_create(sc->sc_virtio->sc_dmat,
+ offsetof(struct virtio_blk_req, vr_xs), 1,
+ offsetof(struct virtio_blk_req, vr_xs), 0,
+ BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_cmdsts);
+ if (r != 0) {
+ printf("cmd dmamap creation failed, err %d\n", r);
+ goto err_reqs;
+ }
+ r = bus_dmamap_load(sc->sc_virtio->sc_dmat, vr->vr_cmdsts,
+ &vr->vr_hdr, offsetof(struct virtio_blk_req, vr_xs), NULL,
+ BUS_DMA_NOWAIT);
+ if (r != 0) {
+ printf("command dmamap load failed, err %d\n", r);
+ goto err_reqs;
+ }
+ r = bus_dmamap_create(sc->sc_virtio->sc_dmat, MAXPHYS,
+ sc->sc_seg_max, MAXPHYS, 0,
+ BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_payload);
+ if (r != 0) {
+ printf("payload dmamap creation failed, err %d\n", r);
+ goto err_reqs;
+ }
+ }
+ return 0;
+
+err_reqs:
+ for (i = 0; i < qsize; i++) {
+ struct virtio_blk_req *vr = &sc->sc_reqs[i];
+ if (vr->vr_cmdsts) {
+ bus_dmamap_destroy(sc->sc_virtio->sc_dmat,
+ vr->vr_cmdsts);
+ vr->vr_cmdsts = 0;
+ }
+ if (vr->vr_payload) {
+ bus_dmamap_destroy(sc->sc_virtio->sc_dmat,
+ vr->vr_payload);
+ vr->vr_payload = 0;
+ }
+ }
+ bus_dmamem_unmap(sc->sc_virtio->sc_dmat, (caddr_t)sc->sc_reqs,
+ allocsize);
+err_dmamem_alloc:
+ bus_dmamem_free(sc->sc_virtio->sc_dmat, &sc->sc_reqs_segs[0], 1);
+err_none:
+ return -1;
+}
diff --git a/sys/dev/pci/vioblkreg.h b/sys/dev/pci/vioblkreg.h
new file mode 100644
index 00000000000..81d90064895
--- /dev/null
+++ b/sys/dev/pci/vioblkreg.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2012 Stefan Fritsch.
+ * Copyright (c) 2010 Minoura Makoto.
+ * Copyright (c) 1998, 2001 Manuel Bouyer.
+ * All rights reserved.
+ *
+ * This code is based in part on the NetBSD ld_virtio driver and the
+ * OpenBSD wd driver.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Configuration registers */
+#define VIRTIO_BLK_CONFIG_CAPACITY 0 /* 64bit */
+#define VIRTIO_BLK_CONFIG_SIZE_MAX 8 /* 32bit */
+#define VIRTIO_BLK_CONFIG_SEG_MAX 12 /* 32bit */
+#define VIRTIO_BLK_CONFIG_GEOMETRY_C 16 /* 16bit */
+#define VIRTIO_BLK_CONFIG_GEOMETRY_H 18 /* 8bit */
+#define VIRTIO_BLK_CONFIG_GEOMETRY_S 19 /* 8bit */
+#define VIRTIO_BLK_CONFIG_BLK_SIZE 20 /* 32bit */
+
+/* Feature bits */
+#define VIRTIO_BLK_F_BARRIER (1<<0)
+#define VIRTIO_BLK_F_SIZE_MAX (1<<1)
+#define VIRTIO_BLK_F_SEG_MAX (1<<2)
+#define VIRTIO_BLK_F_GEOMETRY (1<<4)
+#define VIRTIO_BLK_F_RO (1<<5)
+#define VIRTIO_BLK_F_BLK_SIZE (1<<6)
+#define VIRTIO_BLK_F_SCSI (1<<7)
+#define VIRTIO_BLK_F_FLUSH (1<<9)
+#define VIRTIO_BLK_F_TOPOLOGY (1<<10)
+
+/* Command */
+#define VIRTIO_BLK_T_IN 0
+#define VIRTIO_BLK_T_OUT 1
+#define VIRTIO_BLK_T_SCSI_CMD 2
+#define VIRTIO_BLK_T_SCSI_CMD_OUT 3
+#define VIRTIO_BLK_T_FLUSH 4
+#define VIRTIO_BLK_T_FLUSH_OUT 5
+#define VIRTIO_BLK_T_GET_ID 8 /* from qemu, not in spec, yet */
+#define VIRTIO_BLK_T_BARRIER 0x80000000
+
+/* Status */
+#define VIRTIO_BLK_S_OK 0
+#define VIRTIO_BLK_S_IOERR 1
+
+#define VIRTIO_BLK_ID_BYTES 20 /* length of serial number */
+
+/* Request header structure */
+struct virtio_blk_req_hdr {
+ uint32_t type; /* VIRTIO_BLK_T_* */
+ uint32_t ioprio;
+ uint64_t sector;
+} __packed;
+/* 512*virtio_blk_req_hdr.sector byte payload and 1 byte status follows */
+
+#define VIRTIO_BLK_SECTOR_SIZE 512
diff --git a/sys/dev/pci/virtio.c b/sys/dev/pci/virtio.c
new file mode 100644
index 00000000000..d886b8c8e7c
--- /dev/null
+++ b/sys/dev/pci/virtio.c
@@ -0,0 +1,918 @@
+/* $NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $ */
+
+/*
+ * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
+ * Copyright (c) 2010 Minoura Makoto.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/device.h>
+#include <sys/mutex.h>
+
+#include <dev/pci/pcidevs.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include <dev/pci/virtioreg.h>
+#include <dev/pci/virtiovar.h>
+
+#define MINSEG_INDIRECT 2 /* use indirect if nsegs >= this value */
+
+#if VIRTIO_DEBUG
+#define VIRITO_ASSERT(x) KASSERT(x)
+#else
+#define VIRITO_ASSERT(x)
+#endif
+
+void virtio_init_vq(struct virtio_softc *,
+ struct virtqueue *, int);
+void vq_free_entry(struct virtqueue *, struct vq_entry *);
+void vq_free_entry_locked(struct virtqueue *, struct vq_entry *);
+struct vq_entry *vq_alloc_entry(struct virtqueue *);
+
+struct cfdriver virtio_cd = {
+ NULL, "virtio", DV_DULL
+};
+
+#define virtio_set_status(sc, s) (sc)->sc_ops->set_status(sc, s)
+#define virtio_device_reset(sc) virtio_set_status((sc), 0)
+
+static const char * const virtio_device_name[] = {
+ "Unknown (0)", /* 0 */
+ "Network", /* 1 */
+ "Block", /* 2 */
+ "Console", /* 3 */
+ "Entropy", /* 4 */
+ "Memory Balloon", /* 5 */
+ "IO Memory", /* 6 */
+ "Rpmsg", /* 7 */
+ "SCSI host", /* 8 */
+ "9P Transport" /* 9 */
+ "mac80211 wlan" /* 10 */
+};
+#define NDEVNAMES (sizeof(virtio_device_name)/sizeof(char*))
+
+static const struct virtio_feature_name transport_feature_names[] = {
+ { VIRTIO_F_NOTIFY_ON_EMPTY, "NotifyOnEmpty"},
+ { VIRTIO_F_RING_INDIRECT_DESC, "RingIndirectDesc"},
+ { VIRTIO_F_RING_EVENT_IDX, "RingEventIdx"},
+ { VIRTIO_F_BAD_FEATURE, "BadFeature"},
+ { 0, NULL}
+};
+
+const char *
+virtio_device_string(int id)
+{
+ return id < NDEVNAMES ? virtio_device_name[id] : "Unknown";
+}
+
+void
+virtio_log_features(uint32_t host, uint32_t neg,
+ const struct virtio_feature_name *guest_feature_names)
+{
+ const struct virtio_feature_name *namep;
+ int i;
+ char c;
+ uint32_t bit;
+
+ for (i = 0; i < 32; i++) {
+ if (i == 30) {
+ /*
+ * VIRTIO_F_BAD_FEATURE is only used for
+ * checking correct negotiation
+ */
+ continue;
+ }
+ bit = 1 << i;
+ if ((host&bit) == 0)
+ continue;
+ namep = (i < 24) ? guest_feature_names :
+ transport_feature_names;
+ while (namep->bit && namep->bit != bit)
+ namep++;
+ c = (neg&bit) ? '+' : '-';
+ if (namep->name)
+ printf(" %c%s", c, namep->name);
+ else
+ printf(" %cUnknown(%d)", c, i);
+ }
+}
+
+/*
+ * Reset the device.
+ */
+/*
+ * To reset the device to a known state, do following:
+ * virtio_reset(sc); // this will stop the device activity
+ * <dequeue finished requests>; // virtio_dequeue() still can be called
+ * <revoke pending requests in the vqs if any>;
+ * virtio_reinit_start(sc); // dequeue prohibitted
+ * newfeatures = virtio_negotiate_features(sc, requestedfeatures);
+ * <some other initialization>;
+ * virtio_reinit_end(sc); // device activated; enqueue allowed
+ * Once attached, feature negotiation can only be allowed after virtio_reset.
+ */
+void
+virtio_reset(struct virtio_softc *sc)
+{
+ virtio_device_reset(sc);
+}
+
+void
+virtio_reinit_start(struct virtio_softc *sc)
+{
+ int i;
+
+ virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
+ virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
+ for (i = 0; i < sc->sc_nvqs; i++) {
+ int n;
+ struct virtqueue *vq = &sc->sc_vqs[i];
+ n = virtio_read_queue_size(sc, vq->vq_index);
+ if (n == 0) /* vq disappeared */
+ continue;
+ if (n != vq->vq_num) {
+ panic("%s: virtqueue size changed, vq index %d\n",
+ sc->sc_dev.dv_xname, vq->vq_index);
+ }
+ virtio_init_vq(sc, vq, 1);
+ virtio_write_queue_address(sc, vq->vq_index,
+ vq->vq_dmamap->dm_segs[0].ds_addr / VIRTIO_PAGE_SIZE);
+ }
+}
+
+void
+virtio_reinit_end(struct virtio_softc *sc)
+{
+ virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
+}
+
+/*
+ * dmamap sync operations for a virtqueue.
+ */
+static inline void
+vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+ /* availoffset == sizeof(vring_desc)*vq_num */
+ bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
+ ops);
+}
+
+static inline void
+vq_sync_aring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+ bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+ vq->vq_availoffset,
+ offsetof(struct vring_avail, ring)
+ + vq->vq_num * sizeof(uint16_t),
+ ops);
+}
+
+static inline void
+vq_sync_uring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+ bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+ vq->vq_usedoffset,
+ offsetof(struct vring_used, ring)
+ + vq->vq_num * sizeof(struct vring_used_elem),
+ ops);
+}
+
+static inline void
+vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
+ int ops)
+{
+ int offset = vq->vq_indirectoffset
+ + sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
+
+ bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+ offset, sizeof(struct vring_desc) * vq->vq_maxnsegs,
+ ops);
+}
+
+/*
+ * Can be used as sc_intrhand.
+ */
+/*
+ * Scan vq, bus_dmamap_sync for the vqs (not for the payload),
+ * and calls (*vq_done)() if some entries are consumed.
+ */
+int
+virtio_vq_intr(struct virtio_softc *sc)
+{
+ struct virtqueue *vq;
+ int i, r = 0;
+
+ /* going backwards is better for if_vio */
+ for (i = sc->sc_nvqs - 1; i >= 0; i--) {
+ vq = &sc->sc_vqs[i];
+ if (vq->vq_queued) {
+ vq->vq_queued = 0;
+ vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
+ }
+ vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
+ if (vq->vq_used_idx != vq->vq_used->idx) {
+ if (vq->vq_done)
+ r |= (vq->vq_done)(vq);
+ }
+ }
+
+ return r;
+}
+
+/*
+ * Initialize vq structure.
+ */
+void
+virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq, int reinit)
+{
+ int i, j;
+ int vq_size = vq->vq_num;
+
+ memset(vq->vq_vaddr, 0, vq->vq_bytesize);
+
+ /* build the indirect descriptor chain */
+ if (vq->vq_indirect != NULL) {
+ struct vring_desc *vd;
+
+ for (i = 0; i < vq_size; i++) {
+ vd = vq->vq_indirect;
+ vd += vq->vq_maxnsegs * i;
+ for (j = 0; j < vq->vq_maxnsegs-1; j++)
+ vd[j].next = j + 1;
+ }
+ }
+
+ /* free slot management */
+ SIMPLEQ_INIT(&vq->vq_freelist);
+ for (i = 0; i < vq_size; i++) {
+ SIMPLEQ_INSERT_TAIL(&vq->vq_freelist,
+ &vq->vq_entries[i], qe_list);
+ vq->vq_entries[i].qe_index = i;
+ }
+
+ /* enqueue/dequeue status */
+ vq->vq_avail_idx = 0;
+ vq->vq_avail_signalled = 0xffff;
+ vq->vq_used_idx = 0;
+ vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
+ vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
+ vq->vq_queued = 1;
+}
+
+/*
+ * Allocate/free a vq.
+ */
+int
+virtio_alloc_vq(struct virtio_softc *sc,
+ struct virtqueue *vq, int index, int maxsegsize, int maxnsegs,
+ const char *name)
+{
+ int vq_size, allocsize1, allocsize2, allocsize3, allocsize = 0;
+ int rsegs, r, hdrlen;
+#define VIRTQUEUE_ALIGN(n) (((n)+(VIRTIO_PAGE_SIZE-1))& \
+ ~(VIRTIO_PAGE_SIZE-1))
+
+ memset(vq, 0, sizeof(*vq));
+
+ vq_size = virtio_read_queue_size(sc, index);
+ if (vq_size == 0) {
+ printf("virtqueue not exist, index %d for %s\n", index, name);
+ goto err;
+ }
+ if (((vq_size - 1) & vq_size) != 0)
+ panic("vq_size not power of two: %d", vq_size);
+
+ hdrlen = (sc->sc_features & VIRTIO_F_RING_EVENT_IDX) ? 3 : 2;
+
+ /* allocsize1: descriptor table + avail ring + pad */
+ allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size
+ + sizeof(uint16_t) * (hdrlen + vq_size));
+ /* allocsize2: used ring + pad */
+ allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * hdrlen
+ + sizeof(struct vring_used_elem)*vq_size);
+ /* allocsize3: indirect table */
+ /* XXX: This is rather inefficient. In practice only a fraction of this
+ * XXX: memory will be used.
+ */
+ if (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT)
+ allocsize3 = sizeof(struct vring_desc) * maxnsegs * vq_size;
+ else
+ allocsize3 = 0;
+ allocsize = allocsize1 + allocsize2 + allocsize3;
+
+ /* alloc and map the memory */
+ r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
+ &vq->vq_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
+ if (r != 0) {
+ printf("virtqueue %d for %s allocation failed, error %d\n",
+ index, name, r);
+ goto err;
+ }
+ r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], 1, allocsize,
+ (caddr_t*)&vq->vq_vaddr, BUS_DMA_NOWAIT);
+ if (r != 0) {
+ printf("virtqueue %d for %s map failed, error %d\n",
+ index, name, r);
+ goto err;
+ }
+ r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
+ BUS_DMA_NOWAIT, &vq->vq_dmamap);
+ if (r != 0) {
+ printf("virtqueue %d for %s dmamap creation failed, error %d\n",
+ index, name, r);
+ goto err;
+ }
+ r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap,
+ vq->vq_vaddr, allocsize, NULL, BUS_DMA_NOWAIT);
+ if (r != 0) {
+ printf("virtqueue %d for %s dmamap load failed, error %d\n",
+ index, name, r);
+ goto err;
+ }
+
+ virtio_write_queue_address(sc, index,
+ vq->vq_dmamap->dm_segs[0].ds_addr / VIRTIO_PAGE_SIZE);
+
+ /* remember addresses and offsets for later use */
+ vq->vq_owner = sc;
+ vq->vq_num = vq_size;
+ vq->vq_mask = vq_size - 1;
+ vq->vq_index = index;
+ vq->vq_desc = vq->vq_vaddr;
+ vq->vq_availoffset = sizeof(struct vring_desc)*vq_size;
+ vq->vq_avail = (struct vring_avail*)(((char*)vq->vq_desc) +
+ vq->vq_availoffset);
+ vq->vq_usedoffset = allocsize1;
+ vq->vq_used = (struct vring_used*)(((char*)vq->vq_desc) +
+ vq->vq_usedoffset);
+ if (allocsize3 > 0) {
+ vq->vq_indirectoffset = allocsize1 + allocsize2;
+ vq->vq_indirect = (void*)(((char*)vq->vq_desc)
+ + vq->vq_indirectoffset);
+ }
+ vq->vq_bytesize = allocsize;
+ vq->vq_maxsegsize = maxsegsize;
+ vq->vq_maxnsegs = maxnsegs;
+
+ /* free slot management */
+ vq->vq_entries = malloc(sizeof(struct vq_entry)*vq_size,
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (vq->vq_entries == NULL) {
+ r = ENOMEM;
+ goto err;
+ }
+
+ virtio_init_vq(sc, vq, 0);
+
+#if VIRTIO_DEBUG
+ printf("\nallocated %u byte for virtqueue %d for %s, size %d\n",
+ allocsize, index, name, vq_size);
+ if (allocsize3 > 0)
+ printf("using %d byte (%d entries) indirect descriptors\n",
+ allocsize3, maxnsegs * vq_size);
+#endif
+ return 0;
+
+err:
+ virtio_write_queue_address(sc, index, 0);
+ if (vq->vq_dmamap)
+ bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
+ if (vq->vq_vaddr)
+ bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
+ if (vq->vq_segs[0].ds_addr)
+ bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
+ memset(vq, 0, sizeof(*vq));
+
+ return -1;
+}
+
+int
+virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
+{
+ struct vq_entry *qe;
+ int i = 0;
+
+ /* device must be already deactivated */
+ /* confirm the vq is empty */
+ SIMPLEQ_FOREACH(qe, &vq->vq_freelist, qe_list) {
+ i++;
+ }
+ if (i != vq->vq_num) {
+ printf("%s: freeing non-empty vq, index %d\n",
+ sc->sc_dev.dv_xname, vq->vq_index);
+ return EBUSY;
+ }
+
+ /* tell device that there's no virtqueue any longer */
+ virtio_write_queue_address(sc, vq->vq_index, 0);
+
+ free(vq->vq_entries, M_DEVBUF);
+ bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
+ bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
+ bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
+ bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
+ memset(vq, 0, sizeof(*vq));
+
+ return 0;
+}
+
+/*
+ * Free descriptor management.
+ */
+struct vq_entry *
+vq_alloc_entry(struct virtqueue *vq)
+{
+ struct vq_entry *qe;
+
+ if (SIMPLEQ_EMPTY(&vq->vq_freelist))
+ return NULL;
+ qe = SIMPLEQ_FIRST(&vq->vq_freelist);
+ SIMPLEQ_REMOVE_HEAD(&vq->vq_freelist, qe_list);
+
+ return qe;
+}
+
+void
+vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
+{
+ SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, qe, qe_list);
+ return;
+}
+
+void
+vq_free_entry_locked(struct virtqueue *vq, struct vq_entry *qe)
+{
+ SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, qe, qe_list);
+}
+
+/*
+ * Enqueue several dmamaps as a single request.
+ */
+/*
+ * Typical usage:
+ * <queue size> number of followings are stored in arrays
+ * - command blocks (in dmamem) should be pre-allocated and mapped
+ * - dmamaps for command blocks should be pre-allocated and loaded
+ * - dmamaps for payload should be pre-allocated
+ * r = virtio_enqueue_prep(sc, vq, &slot); // allocate a slot
+ * if (r) // currently 0 or EAGAIN
+ * return r;
+ * r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
+ * if (r) {
+ * virtio_enqueue_abort(sc, vq, slot);
+ * bus_dmamap_unload(dmat, dmamap_payload[slot]);
+ * return r;
+ * }
+ * r = virtio_enqueue_reserve(sc, vq, slot,
+ * dmamap_payload[slot]->dm_nsegs+1);
+ * // ^ +1 for command
+ * if (r) { // currently 0 or EAGAIN
+ * bus_dmamap_unload(dmat, dmamap_payload[slot]);
+ * return r; // do not call abort()
+ * }
+ * <setup and prepare commands>
+ * bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
+ * bus_dmamap_sync(dmat, dmamap_payload[slot],...);
+ * virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], 0);
+ * virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
+ * virtio_enqueue_commit(sc, vq, slot, 1);
+ */
+
+/*
+ * enqueue_prep: allocate a slot number
+ */
+int
+virtio_enqueue_prep(struct virtqueue *vq, int *slotp)
+{
+ struct vq_entry *qe1;
+
+ VIRITO_ASSERT(slotp != NULL);
+
+ qe1 = vq_alloc_entry(vq);
+ if (qe1 == NULL)
+ return EAGAIN;
+ /* next slot is not allocated yet */
+ qe1->qe_next = -1;
+ *slotp = qe1->qe_index;
+
+ return 0;
+}
+
+/*
+ * enqueue_reserve: allocate remaining slots and build the descriptor chain.
+ * Calls virtio_enqueue_abort() on failure.
+ */
+int
+virtio_enqueue_reserve(struct virtqueue *vq, int slot, int nsegs)
+{
+ int indirect;
+ struct vq_entry *qe1 = &vq->vq_entries[slot];
+
+ VIRITO_ASSERT(qe1->qe_next == -1);
+ VIRITO_ASSERT(1 <= nsegs && nsegs <= vq->vq_num);
+
+ if ((vq->vq_indirect != NULL) &&
+ (nsegs >= MINSEG_INDIRECT) &&
+ (nsegs <= vq->vq_maxnsegs))
+ indirect = 1;
+ else
+ indirect = 0;
+ qe1->qe_indirect = indirect;
+
+ if (indirect) {
+ struct vring_desc *vd;
+ int i;
+
+ vd = &vq->vq_desc[qe1->qe_index];
+ vd->addr = vq->vq_dmamap->dm_segs[0].ds_addr
+ + vq->vq_indirectoffset;
+ vd->addr += sizeof(struct vring_desc)
+ * vq->vq_maxnsegs * qe1->qe_index;
+ vd->len = sizeof(struct vring_desc) * nsegs;
+ vd->flags = VRING_DESC_F_INDIRECT;
+
+ vd = vq->vq_indirect;
+ vd += vq->vq_maxnsegs * qe1->qe_index;
+ qe1->qe_desc_base = vd;
+
+ for (i = 0; i < nsegs-1; i++) {
+ vd[i].flags = VRING_DESC_F_NEXT;
+ }
+ vd[i].flags = 0;
+ qe1->qe_next = 0;
+
+ return 0;
+ } else {
+ struct vring_desc *vd;
+ struct vq_entry *qe;
+ int i, s;
+
+ vd = &vq->vq_desc[0];
+ qe1->qe_desc_base = vd;
+ qe1->qe_next = qe1->qe_index;
+ s = slot;
+ for (i = 0; i < nsegs - 1; i++) {
+ qe = vq_alloc_entry(vq);
+ if (qe == NULL) {
+ vd[s].flags = 0;
+ virtio_enqueue_abort(vq, slot);
+ return EAGAIN;
+ }
+ vd[s].flags = VRING_DESC_F_NEXT;
+ vd[s].next = qe->qe_index;
+ s = qe->qe_index;
+ }
+ vd[s].flags = 0;
+
+ return 0;
+ }
+}
+
+/*
+ * enqueue: enqueue a single dmamap.
+ */
+int
+virtio_enqueue(struct virtqueue *vq, int slot, bus_dmamap_t dmamap, int write)
+{
+ struct vq_entry *qe1 = &vq->vq_entries[slot];
+ struct vring_desc *vd = qe1->qe_desc_base;
+ int i;
+ int s = qe1->qe_next;
+
+ VIRITO_ASSERT(s >= 0);
+ VIRITO_ASSERT(dmamap->dm_nsegs > 0);
+ if (dmamap->dm_nsegs > vq->vq_maxnsegs) {
+ for (i = 0; i < dmamap->dm_nsegs; i++) {
+ printf(" %d (%d): %p %u \n", i, write,
+ dmamap->dm_segs[i].ds_addr,
+ dmamap->dm_segs[i].ds_len);
+ }
+ panic("dmamap->dm_nseg %d > vq->vq_maxnsegs %d\n",
+ dmamap->dm_nsegs, vq->vq_maxnsegs);
+ }
+
+ for (i = 0; i < dmamap->dm_nsegs; i++) {
+ vd[s].addr = dmamap->dm_segs[i].ds_addr;
+ vd[s].len = dmamap->dm_segs[i].ds_len;
+ if (!write)
+ vd[s].flags |= VRING_DESC_F_WRITE;
+ s = vd[s].next;
+ }
+ qe1->qe_next = s;
+
+ return 0;
+}
+
+int
+virtio_enqueue_p(struct virtqueue *vq, int slot, bus_dmamap_t dmamap,
+ bus_addr_t start, bus_size_t len, int write)
+{
+ struct vq_entry *qe1 = &vq->vq_entries[slot];
+ struct vring_desc *vd = qe1->qe_desc_base;
+ int s = qe1->qe_next;
+
+ VIRITO_ASSERT(s >= 0);
+ /* XXX todo: handle more segments */
+ VIRITO_ASSERT(dmamap->dm_nsegs == 1);
+ VIRITO_ASSERT((dmamap->dm_segs[0].ds_len > start) &&
+ (dmamap->dm_segs[0].ds_len >= start + len));
+
+ vd[s].addr = dmamap->dm_segs[0].ds_addr + start;
+ vd[s].len = len;
+ if (!write)
+ vd[s].flags |= VRING_DESC_F_WRITE;
+ qe1->qe_next = vd[s].next;
+
+ return 0;
+}
+
+static void
+publish_avail_idx(struct virtio_softc *sc, struct virtqueue *vq)
+{
+ vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
+ vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
+ vq->vq_avail->idx = vq->vq_avail_idx;
+ vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
+ vq->vq_queued = 1;
+ vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
+}
+
+/*
+ * enqueue_commit: add it to the aring.
+ */
+int
+virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq,
+ int slot, int notifynow)
+{
+ struct vq_entry *qe1;
+
+ if (slot < 0)
+ goto notify;
+ vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
+ qe1 = &vq->vq_entries[slot];
+ if (qe1->qe_indirect)
+ vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
+ vq->vq_avail->ring[(vq->vq_avail_idx++) & vq->vq_mask] = slot;
+
+notify:
+ if (notifynow) {
+ if (vq->vq_owner->sc_features & VIRTIO_F_RING_EVENT_IDX) {
+ uint16_t o = vq->vq_avail_signalled;
+ uint16_t n = vq->vq_avail_idx;
+ uint16_t t = VQ_AVAIL_EVENT(vq) + 1;
+ publish_avail_idx(sc, vq);
+ if ((o < n && o < t && t <= n)
+ || (o > n && (o < t || t <= n))) {
+ sc->sc_ops->kick(sc, vq->vq_index);
+ vq->vq_avail_signalled = n;
+ }
+ } else {
+ publish_avail_idx(sc, vq);
+ if (!(vq->vq_used->flags & VRING_USED_F_NO_NOTIFY))
+ sc->sc_ops->kick(sc, vq->vq_index);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * enqueue_abort: rollback.
+ */
+int
+virtio_enqueue_abort(struct virtqueue *vq, int slot)
+{
+ struct vq_entry *qe = &vq->vq_entries[slot];
+ struct vring_desc *vd;
+ int s;
+
+ if (qe->qe_next < 0) {
+ vq_free_entry(vq, qe);
+ return 0;
+ }
+
+ s = slot;
+ vd = &vq->vq_desc[0];
+ while (vd[s].flags & VRING_DESC_F_NEXT) {
+ s = vd[s].next;
+ vq_free_entry_locked(vq, qe);
+ qe = &vq->vq_entries[s];
+ }
+ vq_free_entry_locked(vq, qe);
+ return 0;
+}
+
+/*
+ * Dequeue a request.
+ */
+/*
+ * dequeue: dequeue a request from uring; dmamap_sync for uring is
+ * already done in the interrupt handler.
+ */
+int
+virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
+ int *slotp, int *lenp)
+{
+ uint16_t slot, usedidx;
+ struct vq_entry *qe;
+
+ if (vq->vq_used_idx == vq->vq_used->idx)
+ return ENOENT;
+ usedidx = vq->vq_used_idx++;
+ usedidx &= vq->vq_mask;
+ slot = vq->vq_used->ring[usedidx].id;
+ qe = &vq->vq_entries[slot];
+
+ if (qe->qe_indirect)
+ vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
+
+ if (slotp)
+ *slotp = slot;
+ if (lenp)
+ *lenp = vq->vq_used->ring[usedidx].len;
+
+ return 0;
+}
+
+/*
+ * dequeue_commit: complete dequeue; the slot is recycled for future use.
+ * if you forget to call this the slot will be leaked.
+ */
+int
+virtio_dequeue_commit(struct virtqueue *vq, int slot)
+{
+ struct vq_entry *qe = &vq->vq_entries[slot];
+ struct vring_desc *vd = &vq->vq_desc[0];
+ int s = slot;
+
+ while (vd[s].flags & VRING_DESC_F_NEXT) {
+ s = vd[s].next;
+ vq_free_entry_locked(vq, qe);
+ qe = &vq->vq_entries[s];
+ }
+ vq_free_entry_locked(vq, qe);
+
+ return 0;
+}
+
+/*
+ * Increase the event index in order to delay interrupts.
+ * Returns 0 on success; returns 1 if the used ring has already advanced
+ * too far, and the caller must process the queue again (otherewise, no
+ * more interrupts will happen).
+ */
+int
+virtio_postpone_intr(struct virtqueue *vq, uint16_t nslots)
+{
+ uint16_t idx;
+
+ idx = vq->vq_used_idx + nslots;
+
+ /* set the new event index: avail_ring->used_event = idx */
+ VQ_USED_EVENT(vq) = idx;
+
+ vq_sync_aring(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
+ vq->vq_queued++;
+
+ if (nslots < virtio_nused(vq))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Postpone interrupt until 3/4 of the available descriptors have been
+ * consumed.
+ */
+int
+virtio_postpone_intr_smart(struct virtqueue *vq)
+{
+ uint16_t nslots;
+
+ nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx) * 3 / 4;
+
+ return virtio_postpone_intr(vq, nslots);
+}
+
+/*
+ * Postpone interrupt until all of the available descriptors have been
+ * consumed.
+ */
+int
+virtio_postpone_intr_far(struct virtqueue *vq)
+{
+ uint16_t nslots;
+
+ nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx);
+
+ return virtio_postpone_intr(vq, nslots);
+}
+
+
+/*
+ * Start/stop vq interrupt. No guarantee.
+ */
+void
+virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
+{
+ if ((sc->sc_features & VIRTIO_F_RING_EVENT_IDX)) {
+ /*
+ * No way to disable the interrupt completely with
+ * RingEventIdx. Instead advance used_event by half
+ * the possible value. This won't happen soon and
+ * is far enough in the past to not trigger a spurios
+ * interrupt.
+ */
+ VQ_USED_EVENT(vq) = vq->vq_used_idx + 0x8000;
+ } else {
+ vq->vq_avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
+ }
+ vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
+ vq->vq_queued++;
+}
+
+int
+virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
+{
+ /*
+ * If event index feature is negotiated, enabling
+ * interrupts is done through setting the latest
+ * consumed index in the used_event field
+ */
+ if (sc->sc_features & VIRTIO_F_RING_EVENT_IDX)
+ VQ_USED_EVENT(vq) = vq->vq_used_idx;
+ else
+ vq->vq_avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+
+ vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
+ vq->vq_queued++;
+
+ if (vq->vq_used_idx != vq->vq_used->idx)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Returns a number of slots in the used ring available to
+ * be supplied to the avail ring.
+ */
+int
+virtio_nused(struct virtqueue *vq)
+{
+ uint16_t n;
+
+ n = (uint16_t)(vq->vq_used->idx - vq->vq_used_idx);
+ VIRITO_ASSERT(n <= vq->vq_num);
+
+ return n;
+}
+
+#if VIRTIO_DEBUG
+void
+virtio_vq_dump(struct virtqueue *vq)
+{
+ /* Common fields */
+ printf(" + vq num: %d\n", vq->vq_num);
+ printf(" + vq mask: 0x%X\n", vq->vq_mask);
+ printf(" + vq index: %d\n", vq->vq_index);
+ printf(" + vq used idx: %d\n", vq->vq_used_idx);
+ printf(" + vq avail idx: %d\n", vq->vq_avail_idx);
+ printf(" + vq queued: %d\n",vq->vq_queued);
+ /* Avail ring fields */
+ printf(" + avail flags: 0x%X\n", vq->vq_avail->flags);
+ printf(" + avail idx: %d\n", vq->vq_avail->idx);
+ printf(" + avail event: %d\n", VQ_AVAIL_EVENT(vq));
+ /* Used ring fields */
+ printf(" + used flags: 0x%X\n",vq->vq_used->flags);
+ printf(" + used idx: %d\n",vq->vq_used->idx);
+ printf(" + used event: %d\n", VQ_USED_EVENT(vq));
+ printf(" +++++++++++++++++++++++++++\n");
+}
+#endif
diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
new file mode 100644
index 00000000000..fdfb27e76f8
--- /dev/null
+++ b/sys/dev/pci/virtio_pci.c
@@ -0,0 +1,411 @@
+/* $NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $ */
+
+/*
+ * Copyright (c) 2012 Stefan Fritsch.
+ * Copyright (c) 2010 Minoura Makoto.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/device.h>
+#include <sys/mutex.h>
+
+#include <dev/pci/pcidevs.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include <dev/pci/virtioreg.h>
+#include <dev/pci/virtiovar.h>
+
+/*
+ * XXX: Before being used on big endian arches, the access to config registers
+ * XXX: needs to be reviewed/fixed. The non-device specific registers are
+ * XXX: PCI-endian while the device specific registers are native endian.
+ */
+
+#define virtio_set_status(sc, s) virtio_pci_set_status(sc, s)
+#define virtio_device_reset(sc) virtio_set_status((sc), 0)
+
+int virtio_pci_match(struct device *, void *, void *);
+void virtio_pci_attach(struct device *, struct device *, void *);
+int virtio_pci_detach(struct device *, int);
+
+void virtio_pci_kick(struct virtio_softc *, uint16_t);
+uint8_t virtio_pci_read_device_config_1(struct virtio_softc *, int);
+uint16_t virtio_pci_read_device_config_2(struct virtio_softc *, int);
+uint32_t virtio_pci_read_device_config_4(struct virtio_softc *, int);
+uint64_t virtio_pci_read_device_config_8(struct virtio_softc *, int);
+void virtio_pci_write_device_config_1(struct virtio_softc *, int, uint8_t);
+void virtio_pci_write_device_config_2(struct virtio_softc *, int, uint16_t);
+void virtio_pci_write_device_config_4(struct virtio_softc *, int, uint32_t);
+void virtio_pci_write_device_config_8(struct virtio_softc *, int, uint64_t);
+uint16_t virtio_pci_read_queue_size(struct virtio_softc *, uint16_t);
+void virtio_pci_write_queue_address(struct virtio_softc *, uint16_t, uint32_t);
+void virtio_pci_set_status(struct virtio_softc *, int);
+uint32_t virtio_pci_negotiate_features(struct virtio_softc *, uint32_t,
+ const struct virtio_feature_name *);
+int virtio_pci_intr(void *);
+
+struct virtio_pci_softc {
+ struct virtio_softc sc_sc;
+ pci_chipset_tag_t sc_pc;
+
+ bus_space_tag_t sc_iot;
+ bus_space_handle_t sc_ioh;
+ bus_size_t sc_iosize;
+ int sc_config_offset;
+};
+
+struct cfattach virtio_pci_ca = {
+ sizeof(struct virtio_pci_softc),
+ virtio_pci_match,
+ virtio_pci_attach,
+ virtio_pci_detach,
+ NULL
+};
+
+struct virtio_ops virtio_pci_ops = {
+ virtio_pci_kick,
+ virtio_pci_read_device_config_1,
+ virtio_pci_read_device_config_2,
+ virtio_pci_read_device_config_4,
+ virtio_pci_read_device_config_8,
+ virtio_pci_write_device_config_1,
+ virtio_pci_write_device_config_2,
+ virtio_pci_write_device_config_4,
+ virtio_pci_write_device_config_8,
+ virtio_pci_read_queue_size,
+ virtio_pci_write_queue_address,
+ virtio_pci_set_status,
+ virtio_pci_negotiate_features,
+ virtio_pci_intr,
+};
+
+uint16_t
+virtio_pci_read_queue_size(struct virtio_softc *vsc, uint16_t idx)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ bus_space_write_2(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_QUEUE_SELECT,
+ idx);
+ return bus_space_read_2(sc->sc_iot, sc->sc_ioh,
+ VIRTIO_CONFIG_QUEUE_SIZE);
+}
+
+void
+virtio_pci_write_queue_address(struct virtio_softc *vsc, uint16_t idx, uint32_t addr)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ bus_space_write_2(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_QUEUE_SELECT,
+ idx);
+ bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_QUEUE_ADDRESS,
+ addr);
+}
+
+void
+virtio_pci_set_status(struct virtio_softc *vsc, int status)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ int old = 0;
+
+ if (status != 0)
+ old = bus_space_read_1(sc->sc_iot, sc->sc_ioh,
+ VIRTIO_CONFIG_DEVICE_STATUS);
+ bus_space_write_1(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_DEVICE_STATUS,
+ status|old);
+}
+
+int
+virtio_pci_match(struct device *parent, void *match, void *aux)
+{
+ struct pci_attach_args *pa;
+
+ pa = (struct pci_attach_args *)aux;
+ if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_QUMRANET &&
+ PCI_PRODUCT(pa->pa_id) >= 0x1000 &&
+ PCI_PRODUCT(pa->pa_id) <= 0x103f &&
+ PCI_REVISION(pa->pa_class) == 0)
+ return 1;
+ return 0;
+}
+
+void
+virtio_pci_attach(struct device *parent, struct device *self, void *aux)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
+ struct virtio_softc *vsc = &sc->sc_sc;
+ struct pci_attach_args *pa = (struct pci_attach_args *)aux;
+ pci_chipset_tag_t pc = pa->pa_pc;
+ pcitag_t tag = pa->pa_tag;
+ int revision;
+ pcireg_t id;
+ char const *intrstr;
+ pci_intr_handle_t ih;
+
+ revision = PCI_REVISION(pa->pa_class);
+ if (revision != 0) {
+ printf("unknown revision 0x%02x; giving up\n", revision);
+ return;
+ }
+
+ /* subsystem ID shows what I am */
+ id = PCI_PRODUCT(pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG));
+ printf(": Virtio %s Device", virtio_device_string(id));
+
+#ifdef notyet
+ if (pci_get_capability(pc, tag, PCI_CAP_MSIX, NULL, NULL))
+ printf(", msix capable");
+#endif
+ printf("\n");
+
+ vsc->sc_ops = &virtio_pci_ops;
+ sc->sc_pc = pc;
+ vsc->sc_dmat = pa->pa_dmat;
+ sc->sc_config_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
+
+ if (pci_mapreg_map(pa, PCI_MAPREG_START, PCI_MAPREG_TYPE_IO, 0,
+ &sc->sc_iot, &sc->sc_ioh, NULL, &sc->sc_iosize, 0)) {
+ printf("can't map i/o space\n");
+ return;
+ }
+
+ virtio_device_reset(vsc);
+ virtio_pci_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
+ virtio_pci_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
+
+ /* XXX: use softc as aux... */
+ vsc->sc_childdevid = id;
+ vsc->sc_child = NULL;
+ config_found(self, sc, NULL);
+ if (vsc->sc_child == NULL) {
+ printf("no matching child driver; not configured\n");
+ goto fail_1;
+ }
+ if (vsc->sc_child == VIRTIO_CHILD_ERROR) {
+ printf("virtio configuration failed\n");
+ goto fail_1;
+ }
+
+ if (pci_intr_map(pa, &ih)) {
+ printf("couldn't map interrupt\n");
+ goto fail_2;
+ }
+ intrstr = pci_intr_string(pc, ih);
+ vsc->sc_ih = pci_intr_establish(pc, ih, vsc->sc_ipl, virtio_pci_intr, sc, vsc->sc_dev.dv_xname);
+ if (vsc->sc_ih == NULL) {
+ printf("couldn't establish interrupt");
+ if (intrstr != NULL)
+ printf(" at %s", intrstr);
+ printf("\n");
+ goto fail_2;
+ }
+ printf("%s: interrupting at %s\n", vsc->sc_dev.dv_xname, intrstr);
+
+ virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
+ return;
+
+fail_2:
+ config_detach(vsc->sc_child, 0);
+fail_1:
+ /* no pci_mapreg_unmap() or pci_intr_unmap() */
+ virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
+}
+
+int
+virtio_pci_detach(struct device *self, int flags)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
+ struct virtio_softc *vsc = &sc->sc_sc;
+ int r;
+
+ if (vsc->sc_child != 0 && vsc->sc_child != VIRTIO_CHILD_ERROR) {
+ r = config_detach(vsc->sc_child, flags);
+ if (r)
+ return r;
+ }
+ KASSERT(vsc->sc_child == 0 || vsc->sc_child == VIRTIO_CHILD_ERROR);
+ KASSERT(vsc->sc_vqs == 0);
+ pci_intr_disestablish(sc->sc_pc, vsc->sc_ih);
+ vsc->sc_ih = 0;
+ if (sc->sc_iosize)
+ bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize);
+ sc->sc_iosize = 0;
+
+ return 0;
+}
+
+/*
+ * Feature negotiation.
+ * Prints available / negotiated features if guest_feature_names != NULL and
+ * VIRTIO_DEBUG is 1
+ */
+uint32_t
+virtio_pci_negotiate_features(struct virtio_softc *vsc, uint32_t guest_features,
+ const struct virtio_feature_name *guest_feature_names)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ uint32_t host, neg;
+
+ /*
+ * indirect descriptors can be switched off by setting bit 1 in the
+ * driver flags, see config(8)
+ */
+ if (!(vsc->sc_dev.dv_cfdata->cf_flags & 1) &&
+ !(vsc->sc_child->dv_cfdata->cf_flags & 1)) {
+ guest_features |= VIRTIO_F_RING_INDIRECT_DESC;
+ } else {
+ printf("RingIndirectDesc disabled by UKC\n");
+ }
+ host = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
+ VIRTIO_CONFIG_DEVICE_FEATURES);
+ neg = host & guest_features;
+#if VIRTIO_DEBUG
+ if (guest_feature_names)
+ virtio_log_features(host, neg, guest_feature_names);
+#endif
+ bus_space_write_4(sc->sc_iot, sc->sc_ioh,
+ VIRTIO_CONFIG_GUEST_FEATURES, neg);
+ vsc->sc_features = neg;
+ if (neg & VIRTIO_F_RING_INDIRECT_DESC)
+ vsc->sc_indirect = 1;
+ else
+ vsc->sc_indirect = 0;
+
+ return neg;
+}
+
+/*
+ * Device configuration registers.
+ */
+uint8_t
+virtio_pci_read_device_config_1(struct virtio_softc *vsc, int index)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ return bus_space_read_1(sc->sc_iot, sc->sc_ioh,
+ sc->sc_config_offset + index);
+}
+
+uint16_t
+virtio_pci_read_device_config_2(struct virtio_softc *vsc, int index)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ return bus_space_read_2(sc->sc_iot, sc->sc_ioh,
+ sc->sc_config_offset + index);
+}
+
+uint32_t
+virtio_pci_read_device_config_4(struct virtio_softc *vsc, int index)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ return bus_space_read_4(sc->sc_iot, sc->sc_ioh,
+ sc->sc_config_offset + index);
+}
+
+uint64_t
+virtio_pci_read_device_config_8(struct virtio_softc *vsc, int index)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ uint64_t r;
+
+ r = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
+ sc->sc_config_offset + index + sizeof(uint32_t));
+ r <<= 32;
+ r += bus_space_read_4(sc->sc_iot, sc->sc_ioh,
+ sc->sc_config_offset + index);
+ return r;
+}
+
+void
+virtio_pci_write_device_config_1(struct virtio_softc *vsc,
+ int index, uint8_t value)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ bus_space_write_1(sc->sc_iot, sc->sc_ioh,
+ sc->sc_config_offset + index, value);
+}
+
+void
+virtio_pci_write_device_config_2(struct virtio_softc *vsc,
+ int index, uint16_t value)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ bus_space_write_2(sc->sc_iot, sc->sc_ioh,
+ sc->sc_config_offset + index, value);
+}
+
+void
+virtio_pci_write_device_config_4(struct virtio_softc *vsc,
+ int index, uint32_t value)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ bus_space_write_4(sc->sc_iot, sc->sc_ioh,
+ sc->sc_config_offset + index, value);
+}
+
+void
+virtio_pci_write_device_config_8(struct virtio_softc *vsc,
+ int index, uint64_t value)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ bus_space_write_4(sc->sc_iot, sc->sc_ioh,
+ sc->sc_config_offset + index,
+ value & 0xffffffff);
+ bus_space_write_4(sc->sc_iot, sc->sc_ioh,
+ sc->sc_config_offset + index + sizeof(uint32_t),
+ value >> 32);
+}
+
+/*
+ * Interrupt handler.
+ */
+int
+virtio_pci_intr(void *arg)
+{
+ struct virtio_pci_softc *sc = arg;
+ struct virtio_softc *vsc = &sc->sc_sc;
+ int isr, r = 0;
+
+ /* check and ack the interrupt */
+ isr = bus_space_read_1(sc->sc_iot, sc->sc_ioh,
+ VIRTIO_CONFIG_ISR_STATUS);
+ if (isr == 0)
+ return 0;
+ if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
+ (vsc->sc_config_change != NULL))
+ r = (vsc->sc_config_change)(vsc);
+ if (vsc->sc_intrhand != NULL)
+ r |= (vsc->sc_intrhand)(vsc);
+
+ return r;
+}
+
+void
+virtio_pci_kick(struct virtio_softc *vsc, uint16_t idx)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ bus_space_write_2(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_QUEUE_NOTIFY,
+ idx);
+}
diff --git a/sys/dev/pci/virtioreg.h b/sys/dev/pci/virtioreg.h
new file mode 100644
index 00000000000..89a47ff095a
--- /dev/null
+++ b/sys/dev/pci/virtioreg.h
@@ -0,0 +1,193 @@
+/* $NetBSD: virtioreg.h,v 1.1 2011/10/30 12:12:21 hannken Exp $ */
+
+/*
+ * Copyright (c) 2012 Stefan Fritsch.
+ * Copyright (c) 2010 Minoura Makoto.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Part of the file derived from `Virtio PCI Card Specification v0.8.6 DRAFT'
+ * Appendix A.
+ */
+/* An interface for efficient virtio implementation.
+ *
+ * This header is BSD licensed so anyone can use the definitions
+ * to implement compatible drivers/servers.
+ *
+ * Copyright 2007, 2009, IBM Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#ifndef _DEV_PCI_VIRTIOREG_H_
+#define _DEV_PCI_VIRTIOREG_H_
+
+#include <sys/types.h>
+
+/* Virtio product id (subsystem) */
+#define PCI_PRODUCT_VIRTIO_NETWORK 1
+#define PCI_PRODUCT_VIRTIO_BLOCK 2
+#define PCI_PRODUCT_VIRTIO_CONSOLE 3
+#define PCI_PRODUCT_VIRTIO_ENTROPY 4
+#define PCI_PRODUCT_VIRTIO_BALLOON 5
+#define PCI_PRODUCT_VIRTIO_IOMEM 6
+#define PCI_PRODUCT_VIRTIO_RPMSG 7
+#define PCI_PRODUCT_VIRTIO_SCSI 8
+#define PCI_PRODUCT_VIRTIO_9P 9
+#define PCI_PRODUCT_VIRTIO_MAC80211 10
+
+/* Virtio header */
+#define VIRTIO_CONFIG_DEVICE_FEATURES 0 /* 32bit */
+#define VIRTIO_CONFIG_GUEST_FEATURES 4 /* 32bit */
+#define VIRTIO_F_NOTIFY_ON_EMPTY (1<<24)
+#define VIRTIO_F_RING_INDIRECT_DESC (1<<28)
+#define VIRTIO_F_RING_EVENT_IDX (1<<29)
+#define VIRTIO_F_BAD_FEATURE (1<<30)
+#define VIRTIO_CONFIG_QUEUE_ADDRESS 8 /* 32bit */
+#define VIRTIO_CONFIG_QUEUE_SIZE 12 /* 16bit */
+#define VIRTIO_CONFIG_QUEUE_SELECT 14 /* 16bit */
+#define VIRTIO_CONFIG_QUEUE_NOTIFY 16 /* 16bit */
+#define VIRTIO_CONFIG_DEVICE_STATUS 18 /* 8bit */
+#define VIRTIO_CONFIG_DEVICE_STATUS_RESET 0
+#define VIRTIO_CONFIG_DEVICE_STATUS_ACK 1
+#define VIRTIO_CONFIG_DEVICE_STATUS_DRIVER 2
+#define VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK 4
+#define VIRTIO_CONFIG_DEVICE_STATUS_FAILED 128
+#define VIRTIO_CONFIG_ISR_STATUS 19 /* 8bit */
+#define VIRTIO_CONFIG_ISR_CONFIG_CHANGE 2
+#define VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI 20
+/* Only if MSIX is enabled: */
+#define VIRTIO_MSI_CONFIG_VECTOR 20 /* 16bit, optional */
+#define VIRTIO_MSI_QUEUE_VECTOR 22 /* 16bit, optional */
+#define VIRTIO_CONFIG_DEVICE_CONFIG_MSI 24
+
+/* Virtqueue */
+/* This marks a buffer as continuing via the next field. */
+#define VRING_DESC_F_NEXT 1
+/* This marks a buffer as write-only (otherwise read-only). */
+#define VRING_DESC_F_WRITE 2
+/* This means the buffer contains a list of buffer descriptors. */
+#define VRING_DESC_F_INDIRECT 4
+
+/* The Host uses this in used->flags to advise the Guest: don't kick me
+ * when you add a buffer. It's unreliable, so it's simply an
+ * optimization. Guest will still kick if it's out of buffers. */
+#define VRING_USED_F_NO_NOTIFY 1
+/* The Guest uses this in avail->flags to advise the Host: don't
+ * interrupt me when you consume a buffer. It's unreliable, so it's
+ * simply an optimization. */
+#define VRING_AVAIL_F_NO_INTERRUPT 1
+
+
+/* The standard layout for the ring is a continuous chunk of memory which
+ * looks like this. We assume num is a power of 2.
+ *
+ * struct vring {
+ * // The actual descriptors (16 bytes each)
+ * struct vring_desc desc[num];
+ *
+ * // A ring of available descriptor heads with free-running index.
+ * __u16 avail_flags;
+ * __u16 avail_idx;
+ * __u16 available[num];
+ * __u16 used_event_idx
+ *
+ * // Padding to the next align boundary.
+ * char pad[];
+ *
+ * // A ring of used descriptor heads with free-running index.
+ * __u16 used_flags;
+ * __u16 used_idx;
+ * struct vring_used_elem used[num];
+ * __u16 avail_event_idx;
+ * };
+ * Note: for virtio PCI, align is 4096.
+ */
+
+/* Virtio ring descriptors: 16 bytes.
+ * These can chain together via "next". */
+struct vring_desc {
+ /* Address (guest-physical). */
+ uint64_t addr;
+ /* Length. */
+ uint32_t len;
+ /* The flags as indicated above. */
+ uint16_t flags;
+ /* We chain unused descriptors via this, too */
+ uint16_t next;
+} __packed;
+
+struct vring_avail {
+ uint16_t flags;
+ uint16_t idx;
+ uint16_t ring[0];
+} __packed;
+
+/* u32 is used here for ids for padding reasons. */
+struct vring_used_elem {
+ /* Index of start of used descriptor chain. */
+ uint32_t id;
+ /* Total length of the descriptor chain which was written to. */
+ uint32_t len;
+} __packed;
+
+struct vring_used {
+ uint16_t flags;
+ uint16_t idx;
+ struct vring_used_elem ring[0];
+} __packed;
+
+/*
+ * We publish the used event index at the end of the available ring, and vice
+ * versa. They are at the end for backwards compatibility.
+ */
+#define VQ_USED_EVENT(vq) (*(uint16_t*)(&(vq)->vq_avail->ring[(vq)->vq_num]))
+#define VQ_AVAIL_EVENT(vq) (*(uint16_t*)(&(vq)->vq_used->ring[(vq)->vq_num]))
+
+#define VIRTIO_PAGE_SIZE (4096)
+
+#endif /* _DEV_PCI_VIRTIOREG_H_ */
diff --git a/sys/dev/pci/virtiovar.h b/sys/dev/pci/virtiovar.h
new file mode 100644
index 00000000000..1dc8db5e31e
--- /dev/null
+++ b/sys/dev/pci/virtiovar.h
@@ -0,0 +1,229 @@
+/* $NetBSD: virtiovar.h,v 1.1 2011/10/30 12:12:21 hannken Exp $ */
+
+/*
+ * Copyright (c) 2012 Stefan Fritsch.
+ * Copyright (c) 2010 Minoura Makoto.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Part of the file derived from `Virtio PCI Card Specification v0.8.6 DRAFT'
+ * Appendix A.
+ */
+/* An interface for efficient virtio implementation.
+ *
+ * This header is BSD licensed so anyone can use the definitions
+ * to implement compatible drivers/servers.
+ *
+ * Copyright 2007, 2009, IBM Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#ifndef _DEV_PCI_VIRTIOVAR_H_
+#define _DEV_PCI_VIRTIOVAR_H_
+
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/param.h>
+#include <sys/device.h>
+#include <sys/mutex.h>
+#include <machine/bus.h>
+
+#include <dev/pci/virtioreg.h>
+
+#ifndef VIRTIO_DEBUG
+#define VIRTIO_DEBUG 0
+#endif
+
+struct vq_entry {
+ SIMPLEQ_ENTRY(vq_entry) qe_list; /* free list */
+ uint16_t qe_index; /* index in vq_desc array */
+ /* followings are used only when it is the `head' entry */
+ int16_t qe_next; /* next enq slot */
+ int qe_indirect; /* 1 if using indirect */
+ struct vring_desc *qe_desc_base;
+};
+
+struct virtqueue {
+ struct virtio_softc *vq_owner;
+ unsigned int vq_num; /* queue size (# of entries) */
+ unsigned int vq_mask; /* (1 << vq_num - 1) */
+ int vq_index; /* queue number (0, 1, ...) */
+
+ /* vring pointers (KVA) */
+ struct vring_desc *vq_desc;
+ struct vring_avail *vq_avail;
+ struct vring_used *vq_used;
+ void *vq_indirect;
+
+ /* virtqueue allocation info */
+ void *vq_vaddr;
+ int vq_availoffset;
+ int vq_usedoffset;
+ int vq_indirectoffset;
+ bus_dma_segment_t vq_segs[1];
+ unsigned int vq_bytesize;
+ bus_dmamap_t vq_dmamap;
+
+ int vq_maxsegsize;
+ int vq_maxnsegs;
+
+ /* free entry management */
+ struct vq_entry *vq_entries;
+ SIMPLEQ_HEAD(, vq_entry) vq_freelist;
+ struct mutex *vq_freelist_lock;
+
+ /* enqueue/dequeue status */
+ uint16_t vq_avail_idx;
+ uint16_t vq_avail_signalled;
+ uint16_t vq_used_idx;
+ int vq_queued;
+ struct mutex *vq_aring_lock;
+ struct mutex *vq_uring_lock;
+
+ /* interrupt handler */
+ int (*vq_done)(struct virtqueue*);
+};
+
+struct virtio_feature_name {
+ uint32_t bit;
+ const char *name;
+};
+
+struct virtio_ops {
+ void (*kick)(struct virtio_softc *, uint16_t);
+ uint8_t (*read_dev_cfg_1)(struct virtio_softc *, int);
+ uint16_t (*read_dev_cfg_2)(struct virtio_softc *, int);
+ uint32_t (*read_dev_cfg_4)(struct virtio_softc *, int);
+ uint64_t (*read_dev_cfg_8)(struct virtio_softc *, int);
+ void (*write_dev_cfg_1)(struct virtio_softc *, int, uint8_t);
+ void (*write_dev_cfg_2)(struct virtio_softc *, int, uint16_t);
+ void (*write_dev_cfg_4)(struct virtio_softc *, int, uint32_t);
+ void (*write_dev_cfg_8)(struct virtio_softc *, int, uint64_t);
+ uint16_t (*read_queue_size)(struct virtio_softc *, uint16_t);
+ void (*write_queue_addr)(struct virtio_softc *, uint16_t, uint32_t);
+ void (*set_status)(struct virtio_softc *, int);
+ uint32_t (*neg_features)(struct virtio_softc *, uint32_t, const struct virtio_feature_name *);
+ int (*intr)(void *);
+};
+
+#define VIRTIO_CHILD_ERROR ((void*)1)
+
+struct virtio_softc {
+ struct device sc_dev;
+ bus_dma_tag_t sc_dmat; /* set by transport */
+ struct virtio_ops *sc_ops; /* set by transport */
+
+ int sc_ipl; /* set by child */
+ void *sc_ih; /* set by transport */
+
+ uint32_t sc_features;
+ int sc_indirect;
+
+ int sc_nvqs; /* set by child */
+ struct virtqueue *sc_vqs; /* set by child */
+
+ int sc_childdevid; /* set by transport */
+ struct device *sc_child; /* set by child,
+ * VIRTIO_CHILD_ERROR on error
+ */
+ int (*sc_config_change)(struct virtio_softc*);
+ /* set by child */
+ int (*sc_intrhand)(struct virtio_softc*);
+ /* set by child */
+};
+
+/* public interface */
+#define virtio_read_device_config_1(sc, o) (sc)->sc_ops->read_dev_cfg_1(sc, o)
+#define virtio_read_device_config_2(sc, o) (sc)->sc_ops->read_dev_cfg_2(sc, o)
+#define virtio_read_device_config_4(sc, o) (sc)->sc_ops->read_dev_cfg_4(sc, o)
+#define virtio_read_device_config_8(sc, o) (sc)->sc_ops->read_dev_cfg_8(sc, o)
+#define virtio_write_device_config_1(sc, o, v) (sc)->sc_ops->write_dev_cfg_1(sc, o, v)
+#define virtio_write_device_config_2(sc, o, v) (sc)->sc_ops->write_dev_cfg_2(sc, o, v)
+#define virtio_write_device_config_4(sc, o, v) (sc)->sc_ops->write_dev_cfg_4(sc, o, v)
+#define virtio_write_device_config_8(sc, o, v) (sc)->sc_ops->write_dev_cfg_8(sc, o, v)
+#define virtio_read_queue_size(sc, i) (sc)->sc_ops->read_queue_size(sc, i)
+#define virtio_write_queue_address(sc, i, v) (sc)->sc_ops->write_queue_addr(sc, i, v)
+#define virtio_negotiate_features(sc, f, n) (sc)->sc_ops->neg_features(sc, f, n)
+
+int virtio_alloc_vq(struct virtio_softc*, struct virtqueue*, int, int, int,
+ const char*);
+int virtio_free_vq(struct virtio_softc*, struct virtqueue*);
+void virtio_reset(struct virtio_softc *);
+void virtio_reinit_start(struct virtio_softc *);
+void virtio_reinit_end(struct virtio_softc *);
+
+int virtio_enqueue_prep(struct virtqueue*, int*);
+int virtio_enqueue_reserve(struct virtqueue*, int, int);
+int virtio_enqueue(struct virtqueue*, int, bus_dmamap_t, int);
+int virtio_enqueue_p(struct virtqueue*, int, bus_dmamap_t, bus_addr_t,
+ bus_size_t, int);
+int virtio_enqueue_commit(struct virtio_softc*, struct virtqueue*, int, int);
+#define virtio_notify(sc,vq) virtio_enqueue_commit(sc, vq, -1, 1)
+
+int virtio_enqueue_abort(struct virtqueue*, int);
+
+int virtio_dequeue(struct virtio_softc*, struct virtqueue*, int *, int *);
+int virtio_dequeue_commit(struct virtqueue*, int);
+
+int virtio_intr(void *arg);
+int virtio_vq_intr(struct virtio_softc *);
+void virtio_stop_vq_intr(struct virtio_softc *, struct virtqueue *);
+int virtio_start_vq_intr(struct virtio_softc *, struct virtqueue *);
+
+const char *virtio_device_string(int);
+void virtio_log_features(uint32_t, uint32_t, const struct virtio_feature_name *);
+
+#if VIRTIO_DEBUG
+void virtio_vq_dump(struct virtqueue *vq);
+#endif
+int virtio_nused(struct virtqueue *vq);
+int virtio_postpone_intr(struct virtqueue *vq, uint16_t nslots);
+int virtio_postpone_intr_smart(struct virtqueue *vq);
+int virtio_postpone_intr_far(struct virtqueue *vq);
+
+#endif /* _DEV_PCI_VIRTIOVAR_H_ */