diff options
-rw-r--r-- | share/man/man4/Makefile | 5 | ||||
-rw-r--r-- | share/man/man4/vio.4 | 50 | ||||
-rw-r--r-- | share/man/man4/vioblk.4 | 49 | ||||
-rw-r--r-- | share/man/man4/virtio.4 | 49 | ||||
-rw-r--r-- | sys/arch/amd64/conf/GENERIC | 7 | ||||
-rw-r--r-- | sys/arch/i386/conf/GENERIC | 7 | ||||
-rw-r--r-- | sys/dev/pci/files.pci | 17 | ||||
-rw-r--r-- | sys/dev/pci/if_vio.c | 1357 | ||||
-rw-r--r-- | sys/dev/pci/vioblk.c | 619 | ||||
-rw-r--r-- | sys/dev/pci/vioblkreg.h | 75 | ||||
-rw-r--r-- | sys/dev/pci/virtio.c | 918 | ||||
-rw-r--r-- | sys/dev/pci/virtio_pci.c | 411 | ||||
-rw-r--r-- | sys/dev/pci/virtioreg.h | 193 | ||||
-rw-r--r-- | sys/dev/pci/virtiovar.h | 229 |
14 files changed, 3981 insertions, 5 deletions
diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index 68ec9649bda..4686be35ad6 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile,v 1.537 2012/09/19 18:30:03 yasuoka Exp $ +# $OpenBSD: Makefile,v 1.538 2012/09/19 19:24:33 sf Exp $ MAN= aac.4 ac97.4 acphy.4 \ acpi.4 acpiac.4 acpiasus.4 acpibat.4 acpibtn.4 acpicpu.4 acpidock.4 \ @@ -65,7 +65,8 @@ MAN= aac.4 ac97.4 acphy.4 \ uthum.4 uticom.4 utwitch.4 utrh.4 uts.4 uvideo.4 uvisor.4 uvscom.4 \ uyap.4 \ vether.4 vga.4 vgafb.4 vge.4 \ - viapm.4 viasio.4 vic.4 video.4 vlan.4 vmt.4 vnd.4 vr.4 \ + viapm.4 viasio.4 vic.4 video.4 vio.4 vioblk.4 virtio.4 vlan.4 \ + vmt.4 vnd.4 vr.4 \ vscsi.4 vte.4 \ watchdog.4 wb.4 wbenv.4 wbng.4 wbsd.4 wbsio.4 wd.4 wdc.4 we.4 \ wi.4 wpi.4 wscons.4 wsdisplay.4 wskbd.4 wsmouse.4 wsmux.4 \ diff --git a/share/man/man4/vio.4 b/share/man/man4/vio.4 new file mode 100644 index 00000000000..39656119a51 --- /dev/null +++ b/share/man/man4/vio.4 @@ -0,0 +1,50 @@ +.\" $OpenBSD: vio.4,v 1.1 2012/09/19 19:24:33 sf Exp $ +.\" +.\" Copyright (c) 2012 Stefan Fritsch <sf@sfritsch.de> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: September 19 2012 $ +.Dt VIO 4 +.Os +.Sh NAME +.Nm vio +.Nd virtual network device +.Sh SYNOPSIS +.Cd "vio* at virtio?" +.Sh DESCRIPTION +The +.Nm +driver provides support for the virtio network interface provided by some +hypervisors like kvm or virtualbox. +.Pp +.Sh SEE ALSO +.Xr virtio 4 , +.Xr intro 4 , +.Sh HISTORY +The +.Nm +driver first appeared in +.Ox 5.3 . +.Sh AUTHORS +The +.Nm +driver was ported to OpenBSD and improved by +.An Stefan Fritsch Aq sf@sfritsch.de . +It is based on the NetBSD +.Nm vioif +driver by +.An Minoura Makoto . +.Sh BUGS +.Nm +currently does not support jumbo frames. diff --git a/share/man/man4/vioblk.4 b/share/man/man4/vioblk.4 new file mode 100644 index 00000000000..2ded09861f1 --- /dev/null +++ b/share/man/man4/vioblk.4 @@ -0,0 +1,49 @@ +.\" $OpenBSD: vioblk.4,v 1.1 2012/09/19 19:24:33 sf Exp $ +.\" +.\" Copyright (c) 2012 Stefan Fritsch <sf@sfritsch.de> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: September 19 2012 $ +.Dt VIOBLK 4 +.Os +.Sh NAME +.Nm vioblk +.Nd virtual disk +.Sh SYNOPSIS +.Cd "vioblk* at virtio?" +.Cd "scsibus* at vioblk?" +.Sh DESCRIPTION +The +.Nm +driver provides a virtual disk using a virtio block device provided by a +hypervisor like kvm or virtualbox. +.Pp +.Sh SEE ALSO +.Xr virtio 4 , +.Xr intro 4 , +.Xr scsi 4 +.Sh HISTORY +The +.Nm +driver first appeared in +.Ox 5.3 . +.Sh AUTHORS +The +.Nm +driver was written by +.An Stefan Fritsch Aq sf@sfritsch.de , +based on the +.Xr vdsk 4 +driver by +.An Mark Kettenis Aq kettenis@openbsd.org . diff --git a/share/man/man4/virtio.4 b/share/man/man4/virtio.4 new file mode 100644 index 00000000000..dba8cc6e00a --- /dev/null +++ b/share/man/man4/virtio.4 @@ -0,0 +1,49 @@ +.\" $OpenBSD: virtio.4,v 1.1 2012/09/19 19:24:33 sf Exp $ +.\" +.\" Copyright (c) 2012 Stefan Fritsch <sf@sfritsch.de> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: September 19 2012 $ +.Dt VIRTIO 4 +.Os +.Sh NAME +.Nm virtio +.Nd virtio support driver +.Sh SYNOPSIS +.Cd "virtio* at pci?" +.Sh DESCRIPTION +The +.Nm +driver provides support service for paravirtualized devices like the virtio +network adapter +.Xr vio 4 +or the virtio block device. +Such a devices are provided by some hypervisors like kvm or virtualbox. +.Pp +.Sh SEE ALSO +.Xr vioblk 4 , +.Xr vio 4 , +.Xr intro 4 , +.Sh HISTORY +The +.Nm +driver first appeared in +.Ox 5.3 . +.Sh AUTHORS +The +.Nm +driver was ported to OpenBSD and improved by +.An Stefan Fritsch Aq sf@sfritsch.de . +It is based on the NetBSD driver by +.An Minoura Makoto . diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC index ede6f40d7bf..cc11eb39059 100644 --- a/sys/arch/amd64/conf/GENERIC +++ b/sys/arch/amd64/conf/GENERIC @@ -1,4 +1,4 @@ -# $OpenBSD: GENERIC,v 1.331 2012/08/14 00:54:12 dlg Exp $ +# $OpenBSD: GENERIC,v 1.332 2012/09/19 19:24:33 sf Exp $ # # For further information on compiling OpenBSD kernels, see the config(8) # man page. @@ -599,3 +599,8 @@ pseudo-device mtrr 1 # Memory range attributes control # mouse & keyboard multiplexor pseudo-devices pseudo-device wsmux 2 pseudo-device crypto 1 + +# Virtio devices +virtio* at pci? # Virtio PCI device +vioblk* at virtio? # Virtio block device +vio* at virtio? # Virtio network device diff --git a/sys/arch/i386/conf/GENERIC b/sys/arch/i386/conf/GENERIC index a5d87b9e99d..ad12e9f6b51 100644 --- a/sys/arch/i386/conf/GENERIC +++ b/sys/arch/i386/conf/GENERIC @@ -1,4 +1,4 @@ -# $OpenBSD: GENERIC,v 1.738 2012/08/14 00:54:12 dlg Exp $ +# $OpenBSD: GENERIC,v 1.739 2012/09/19 19:24:33 sf Exp $ # # For further information on compiling OpenBSD kernels, see the config(8) # man page. @@ -790,3 +790,8 @@ pseudo-device hotplug 1 # devices hot plugging # mouse & keyboard multiplexor pseudo-devices pseudo-device wsmux 2 pseudo-device crypto 1 + +# Virtio devices +virtio* at pci? # Virtio PCI device +vioblk* at virtio? # Virtio block device +vio* at virtio? # Virtio network device diff --git a/sys/dev/pci/files.pci b/sys/dev/pci/files.pci index bde71ee0ea5..c16d46dedfb 100644 --- a/sys/dev/pci/files.pci +++ b/sys/dev/pci/files.pci @@ -1,4 +1,4 @@ -# $OpenBSD: files.pci,v 1.287 2012/08/30 21:54:12 mpi Exp $ +# $OpenBSD: files.pci,v 1.288 2012/09/19 19:24:33 sf Exp $ # $NetBSD: files.pci,v 1.20 1996/09/24 17:47:15 christos Exp $ # # Config file and device description for machine-independent PCI code. @@ -823,3 +823,18 @@ file dev/pci/itherm.c itherm device glxpcib: isabus, gpiobus, i2cbus attach glxpcib at pci file dev/pci/glxpcib.c glxpcib + +# VirtIO +device virtio {} +file dev/pci/virtio.c virtio + +attach virtio at pci with virtio_pci +file dev/pci/virtio_pci.c virtio_pci + +device vio +attach vio at virtio +file dev/pci/if_vio.c vio + +device vioblk: scsi +attach vioblk at virtio +file dev/pci/vioblk.c vioblk diff --git a/sys/dev/pci/if_vio.c b/sys/dev/pci/if_vio.c new file mode 100644 index 00000000000..076b77dc3ae --- /dev/null +++ b/sys/dev/pci/if_vio.c @@ -0,0 +1,1357 @@ +/* + * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg. + * Copyright (c) 2010 Minoura Makoto. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include "bpfilter.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/device.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/timeout.h> + +#include <dev/pci/pcidevs.h> +#include <dev/pci/pcireg.h> +#include <dev/pci/pcivar.h> +#include <dev/pci/virtioreg.h> +#include <dev/pci/virtiovar.h> + +#include <net/if.h> +#include <net/if_dl.h> +#include <net/if_media.h> +#include <net/if_types.h> + +#ifdef INET +#include <netinet/in.h> +#include <netinet/if_ether.h> +#endif + +#include <net/bpf.h> + +#if VIRTIO_DEBUG +#define DBGPRINT(fmt, args...) printf("%s: " fmt "\n", __func__, ## args) +#else +#define DBGPRINT(fmt, args...) +#endif + +/* + * if_vioreg.h: + */ +/* Configuration registers */ +#define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */ +#define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */ + +/* Feature bits */ +#define VIRTIO_NET_F_CSUM (1<<0) +#define VIRTIO_NET_F_GUEST_CSUM (1<<1) +#define VIRTIO_NET_F_MAC (1<<5) +#define VIRTIO_NET_F_GSO (1<<6) +#define VIRTIO_NET_F_GUEST_TSO4 (1<<7) +#define VIRTIO_NET_F_GUEST_TSO6 (1<<8) +#define VIRTIO_NET_F_GUEST_ECN (1<<9) +#define VIRTIO_NET_F_GUEST_UFO (1<<10) +#define VIRTIO_NET_F_HOST_TSO4 (1<<11) +#define VIRTIO_NET_F_HOST_TSO6 (1<<12) +#define VIRTIO_NET_F_HOST_ECN (1<<13) +#define VIRTIO_NET_F_HOST_UFO (1<<14) +#define VIRTIO_NET_F_MRG_RXBUF (1<<15) +#define VIRTIO_NET_F_STATUS (1<<16) +#define VIRTIO_NET_F_CTRL_VQ (1<<17) +#define VIRTIO_NET_F_CTRL_RX (1<<18) +#define VIRTIO_NET_F_CTRL_VLAN (1<<19) +#define VIRTIO_NET_F_CTRL_RX_EXTRA (1<<20) +#define VIRTIO_NET_F_GUEST_ANNOUNCE (1<<21) + +static const struct virtio_feature_name virtio_net_feature_names[] = { + { VIRTIO_NET_F_CSUM, "CSum" }, + { VIRTIO_NET_F_MAC, "MAC" }, + { VIRTIO_NET_F_GSO, "GSO" }, + { VIRTIO_NET_F_GUEST_TSO4, "GuestTSO4" }, + { VIRTIO_NET_F_GUEST_TSO6, "GuestTSO6" }, + { VIRTIO_NET_F_GUEST_ECN, "GuestECN" }, + { VIRTIO_NET_F_GUEST_UFO, "GuestUFO" }, + { VIRTIO_NET_F_HOST_TSO4, "HostTSO4" }, + { VIRTIO_NET_F_HOST_TSO6, "HostTSO6" }, + { VIRTIO_NET_F_HOST_ECN, "HostECN" }, + { VIRTIO_NET_F_HOST_UFO, "HostUFO" }, + { VIRTIO_NET_F_MRG_RXBUF, "MrgRXBuf" }, + { VIRTIO_NET_F_STATUS, "Status" }, + { VIRTIO_NET_F_CTRL_VQ, "CtrlVQ" }, + { VIRTIO_NET_F_CTRL_RX, "CtrlRX" }, + { VIRTIO_NET_F_CTRL_VLAN, "CtrlVLAN" }, + { VIRTIO_NET_F_CTRL_RX_EXTRA, "CtrlRXExtra" }, + { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, + { 0, NULL } +}; + +/* Status */ +#define VIRTIO_NET_S_LINK_UP 1 + +/* Packet header structure */ +struct virtio_net_hdr { + uint8_t flags; + uint8_t gso_type; + uint16_t hdr_len; + uint16_t gso_size; + uint16_t csum_start; + uint16_t csum_offset; +#if 0 + uint16_t num_buffers; /* if VIRTIO_NET_F_MRG_RXBUF enabled */ +#endif +} __packed; + +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* flags */ +#define VIRTIO_NET_HDR_GSO_NONE 0 /* gso_type */ +#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* gso_type */ +#define VIRTIO_NET_HDR_GSO_UDP 3 /* gso_type */ +#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* gso_type */ +#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* gso_type, |'ed */ + +#define VIRTIO_NET_MAX_GSO_LEN (65536+ETHER_HDR_LEN) + +/* Control virtqueue */ +struct virtio_net_ctrl_cmd { + uint8_t class; + uint8_t command; +} __packed; +#define VIRTIO_NET_CTRL_RX 0 +# define VIRTIO_NET_CTRL_RX_PROMISC 0 +# define VIRTIO_NET_CTRL_RX_ALLMULTI 1 + +#define VIRTIO_NET_CTRL_MAC 1 +# define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 + +#define VIRTIO_NET_CTRL_VLAN 2 +# define VIRTIO_NET_CTRL_VLAN_ADD 0 +# define VIRTIO_NET_CTRL_VLAN_DEL 1 + +struct virtio_net_ctrl_status { + uint8_t ack; +} __packed; +#define VIRTIO_NET_OK 0 +#define VIRTIO_NET_ERR 1 + +struct virtio_net_ctrl_rx { + uint8_t onoff; +} __packed; + +struct virtio_net_ctrl_mac_tbl { + uint32_t nentries; + uint8_t macs[][ETHER_ADDR_LEN]; +} __packed; + +struct virtio_net_ctrl_vlan { + uint16_t id; +} __packed; + +/* + * if_viovar.h: + */ +enum vio_ctrl_state { + FREE, INUSE, DONE, RESET +}; + +struct vio_softc { + struct device sc_dev; + + struct virtio_softc *sc_virtio; +#define VQRX 0 +#define VQTX 1 +#define VQCTL 2 + struct virtqueue sc_vq[3]; + + struct arpcom sc_ac; + struct ifmedia sc_media; + + short sc_ifflags; + + /* bus_dmamem */ + bus_dma_segment_t sc_dma_seg; + bus_dmamap_t sc_dma_map; + size_t sc_dma_size; + caddr_t sc_dma_kva; + + struct virtio_net_hdr *sc_rx_hdrs; + struct virtio_net_hdr *sc_tx_hdrs; + struct virtio_net_ctrl_cmd *sc_ctrl_cmd; + struct virtio_net_ctrl_status *sc_ctrl_status; + struct virtio_net_ctrl_rx *sc_ctrl_rx; + struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc; +#define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc + struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc; + + /* kmem */ + bus_dmamap_t *sc_arrays; +#define sc_rx_dmamaps sc_arrays + bus_dmamap_t *sc_tx_dmamaps; + struct mbuf **sc_rx_mbufs; + struct mbuf **sc_tx_mbufs; + + enum vio_ctrl_state sc_ctrl_inuse; + + struct timeout sc_tick; +}; + +#define VIO_DMAMEM_OFFSET(sc, p) ((caddr_t)(p) - (sc)->sc_dma_kva) +#define VIO_DMAMEM_SYNC(vsc, sc, p, size, flags) \ + bus_dmamap_sync((vsc)->sc_dmat, (sc)->sc_dma_map, \ + VIO_DMAMEM_OFFSET((sc), (p)), (size), (flags)) +#define VIO_DMAMEM_ENQUEUE(sc, vq, slot, p, size, write) \ + virtio_enqueue_p((vq), (slot), (sc)->sc_dma_map, \ + VIO_DMAMEM_OFFSET((sc), (p)), (size), (write)) + +#define VIRTIO_NET_TX_MAXNSEGS 16 /* for larger chains, defrag */ +#define VIRTIO_NET_CTRL_MAC_MAXENTRIES 64 /* for more entries, use ALLMULTI */ + +/* for now, sc_ctrl_mac_tbl_uc has always 0 entries */ +#define VIO_CTRL_MAC_INFO_SIZE \ + (2*sizeof(struct virtio_net_ctrl_mac_tbl) + \ + 0 + VIRTIO_NET_CTRL_MAC_MAXENTRIES * ETHER_ADDR_LEN) + +/* cfattach interface functions */ +int vio_match(struct device *, void *, void *); +void vio_attach(struct device *, struct device *, void *); + +/* ifnet interface functions */ +int vio_init(struct ifnet *); +void vio_stop(struct ifnet *, int); +void vio_start(struct ifnet *); +int vio_ioctl(struct ifnet *, u_long, caddr_t); +void vio_get_lladr(struct arpcom *ac, struct virtio_softc *vsc); +void vio_put_lladr(struct arpcom *ac, struct virtio_softc *vsc); + +/* rx */ +int vio_add_rx_mbuf(struct vio_softc *, int); +void vio_free_rx_mbuf(struct vio_softc *, int); +void vio_populate_rx_mbufs(struct vio_softc *); +int vio_rxeof(struct vio_softc *); +int vio_rx_intr(struct virtqueue *); +void vio_rx_drain(struct vio_softc *); + +/* tx */ +int vio_tx_intr(struct virtqueue *); +int vio_txeof(struct virtqueue *); +void vio_tx_drain(struct vio_softc *); +int vio_encap(struct vio_softc *, int, struct mbuf *, struct mbuf **); +void vio_txtick(void *); + +/* other control */ +int vio_link_state(struct ifnet *); +int vio_config_change(struct virtio_softc *); +int vio_ctrl_rx(struct vio_softc *, int, int); +int vio_set_rx_filter(struct vio_softc *); +int vio_iff(struct vio_softc *); +int vio_media_change(struct ifnet *); +void vio_media_status(struct ifnet *, struct ifmediareq *); +int vio_ctrleof(struct virtqueue *); +void vio_wait_ctrl(struct vio_softc *sc); +int vio_wait_ctrl_done(struct vio_softc *sc); +void vio_ctrl_wakeup(struct vio_softc *, enum vio_ctrl_state); +int vio_alloc_mem(struct vio_softc *); +int vio_alloc_dmamem(struct vio_softc *); +void vio_free_dmamem(struct vio_softc *); + + +int +vio_match(struct device *parent, void *match, void *aux) +{ + struct virtio_softc *va = aux; + + if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK) + return 1; + + return 0; +} + +struct cfattach vio_ca = { + sizeof(struct vio_softc), vio_match, vio_attach, NULL +}; + +struct cfdriver vio_cd = { + NULL, "vio", DV_IFNET +}; + +int +vio_alloc_dmamem(struct vio_softc *sc) +{ + struct virtio_softc *vsc = sc->sc_virtio; + int nsegs; + + if (bus_dmamap_create(vsc->sc_dmat, sc->sc_dma_size, 1, + sc->sc_dma_size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, + &sc->sc_dma_map) != 0) + goto err; + if (bus_dmamem_alloc(vsc->sc_dmat, sc->sc_dma_size, 16, 0, + &sc->sc_dma_seg, 1, &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO) != 0) + goto destroy; + if (bus_dmamem_map(vsc->sc_dmat, &sc->sc_dma_seg, nsegs, + sc->sc_dma_size, &sc->sc_dma_kva, BUS_DMA_NOWAIT) != 0) + goto free; + if (bus_dmamap_load(vsc->sc_dmat, sc->sc_dma_map, sc->sc_dma_kva, + sc->sc_dma_size, NULL, BUS_DMA_NOWAIT) != 0) + goto unmap; + return (0); + +unmap: + bus_dmamem_unmap(vsc->sc_dmat, sc->sc_dma_kva, sc->sc_dma_size); +free: + bus_dmamem_free(vsc->sc_dmat, &sc->sc_dma_seg, 1); +destroy: + bus_dmamap_destroy(vsc->sc_dmat, sc->sc_dma_map); +err: + return (1); +} + +void +vio_free_dmamem(struct vio_softc *sc) +{ + struct virtio_softc *vsc = sc->sc_virtio; + bus_dmamap_unload(vsc->sc_dmat, sc->sc_dma_map); + bus_dmamem_unmap(vsc->sc_dmat, sc->sc_dma_kva, sc->sc_dma_size); + bus_dmamem_free(vsc->sc_dmat, &sc->sc_dma_seg, 1); + bus_dmamap_destroy(vsc->sc_dmat, sc->sc_dma_map); +} + +/* allocate memory */ +/* + * dma memory is used for: + * sc_rx_hdrs[slot]: metadata array for recieved frames (READ) + * sc_tx_hdrs[slot]: metadata array for frames to be sent (WRITE) + * sc_ctrl_cmd: command to be sent via ctrl vq (WRITE) + * sc_ctrl_status: return value for a command via ctrl vq (READ) + * sc_ctrl_rx: parameter for a VIRTIO_NET_CTRL_RX class command + * (WRITE) + * sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC + * class command (WRITE) + * sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC + * class command (WRITE) + * sc_ctrl_* structures are allocated only one each; they are protected by + * sc_ctrl_inuse, which must only be accessed at splnet + */ +/* + * dynamically allocated memory is used for: + * sc_rx_dmamaps[slot]: bus_dmamap_t array for recieved payload + * sc_tx_dmamaps[slot]: bus_dmamap_t array for sent payload + * sc_rx_mbufs[slot]: mbuf pointer array for recieved frames + * sc_tx_mbufs[slot]: mbuf pointer array for sent frames + */ +int +vio_alloc_mem(struct vio_softc *sc) +{ + struct virtio_softc *vsc = sc->sc_virtio; + int allocsize, r, i; + unsigned int offset = 0; + int rxqsize, txqsize; + caddr_t kva; + + rxqsize = vsc->sc_vqs[0].vq_num; + txqsize = vsc->sc_vqs[1].vq_num; + + allocsize = sizeof(struct virtio_net_hdr) * rxqsize; + allocsize += sizeof(struct virtio_net_hdr) * txqsize; + if (vsc->sc_nvqs == 3) { + allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1; + allocsize += sizeof(struct virtio_net_ctrl_status) * 1; + allocsize += sizeof(struct virtio_net_ctrl_rx) * 1; + allocsize += sizeof(struct virtio_net_ctrl_mac_tbl) + + sizeof(struct virtio_net_ctrl_mac_tbl) + + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES; + } + sc->sc_dma_size = allocsize; + + if (vio_alloc_dmamem(sc) != 0) { + printf("unable to allocate dma region\n"); + return -1; + } + + kva = sc->sc_dma_kva; + sc->sc_rx_hdrs = (struct virtio_net_hdr*)kva; + offset += sizeof(struct virtio_net_hdr) * rxqsize; + sc->sc_tx_hdrs = (struct virtio_net_hdr*)(kva + offset); + offset += sizeof(struct virtio_net_hdr) * txqsize; + if (vsc->sc_nvqs == 3) { + sc->sc_ctrl_cmd = (void*)(kva + offset); + offset += sizeof(*sc->sc_ctrl_cmd); + sc->sc_ctrl_status = (void*)(kva + offset); + offset += sizeof(*sc->sc_ctrl_status); + sc->sc_ctrl_rx = (void*)(kva + offset); + offset += sizeof(*sc->sc_ctrl_rx); + sc->sc_ctrl_mac_tbl_uc = (void*)(kva + offset); + offset += sizeof(*sc->sc_ctrl_mac_tbl_uc); + /* For now, sc_ctrl_mac_tbl_uc is followed by 0 MAC entries */ + sc->sc_ctrl_mac_tbl_mc = (void*)(kva + offset); + } + + allocsize = (rxqsize + txqsize) * + (2 * sizeof(bus_dmamap_t) + sizeof(struct mbuf *)); + sc->sc_arrays = malloc(allocsize, M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (sc->sc_arrays == NULL) { + printf("unable to allocate mem for dmamaps\n"); + goto err_hdr; + } + + sc->sc_tx_dmamaps = sc->sc_arrays + rxqsize; + sc->sc_rx_mbufs = (void*) (sc->sc_tx_dmamaps + txqsize); + sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize; + + for (i = 0; i < rxqsize; i++) { + r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0, + BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]); + if (r != 0) + goto err_reqs; + } + + for (i = 0; i < txqsize; i++) { + r = bus_dmamap_create(vsc->sc_dmat, ETHER_MAX_LEN, + VIRTIO_NET_TX_MAXNSEGS, ETHER_MAX_LEN, 0, + BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, + &sc->sc_tx_dmamaps[i]); + if (r != 0) + goto err_reqs; + } + + return 0; + +err_reqs: + printf("dmamap creation failed, error %d\n", r); + for (i = 0; i < txqsize; i++) { + if (sc->sc_tx_dmamaps[i]) + bus_dmamap_destroy(vsc->sc_dmat, sc->sc_tx_dmamaps[i]); + } + for (i = 0; i < rxqsize; i++) { + if (sc->sc_tx_dmamaps[i]) + bus_dmamap_destroy(vsc->sc_dmat, sc->sc_rx_dmamaps[i]); + } + if (sc->sc_arrays) { + free(sc->sc_arrays, M_DEVBUF); + sc->sc_arrays = 0; + } +err_hdr: + vio_free_dmamem(sc); + return -1; +} + +void +vio_get_lladr(struct arpcom *ac, struct virtio_softc *vsc) +{ + int i; + for (i = 0; i < ETHER_ADDR_LEN; i++) { + ac->ac_enaddr[i] = virtio_read_device_config_1(vsc, + VIRTIO_NET_CONFIG_MAC + i); + } +} + +void +vio_put_lladr(struct arpcom *ac, struct virtio_softc *vsc) +{ + int i; + for (i = 0; i < ETHER_ADDR_LEN; i++) { + virtio_write_device_config_1(vsc, VIRTIO_NET_CONFIG_MAC + i, + ac->ac_enaddr[i]); + } +} + +void +vio_attach(struct device *parent, struct device *self, void *aux) +{ + struct vio_softc *sc = (struct vio_softc *)self; + struct virtio_softc *vsc = (struct virtio_softc *)parent; + uint32_t features; + struct ifnet *ifp = &sc->sc_ac.ac_if; + + if (vsc->sc_child != NULL) { + printf("child already attached for %s; something wrong...\n", + parent->dv_xname); + return; + } + + sc->sc_virtio = vsc; + + vsc->sc_child = self; + vsc->sc_ipl = IPL_NET; + vsc->sc_vqs = &sc->sc_vq[0]; + vsc->sc_config_change = 0; + vsc->sc_intrhand = virtio_vq_intr; + + features = VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | + VIRTIO_NET_F_CTRL_VQ | VIRTIO_NET_F_CTRL_RX; + /* + * VIRTIO_F_RING_EVENT_IDX can be switched off by setting bit 2 in the + * driver flags, see config(8) + */ + if (!(sc->sc_dev.dv_cfdata->cf_flags & 2) && + !(vsc->sc_dev.dv_cfdata->cf_flags & 2)) + features |= VIRTIO_F_RING_EVENT_IDX; + else + printf("RingEventIdx disabled by UKC\n"); + + features = virtio_negotiate_features(vsc, features, + virtio_net_feature_names); + if (features & VIRTIO_NET_F_MAC) { + vio_get_lladr(&sc->sc_ac, vsc); + } else { + ether_fakeaddr(ifp); + vio_put_lladr(&sc->sc_ac, vsc); + } + printf(": address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr)); + + if (virtio_alloc_vq(vsc, &sc->sc_vq[VQRX], 0, + MCLBYTES + sizeof(struct virtio_net_hdr), 2, "rx") != 0) { + goto err; + } + vsc->sc_nvqs = 1; + sc->sc_vq[VQRX].vq_done = vio_rx_intr; + if (virtio_alloc_vq(vsc, &sc->sc_vq[VQTX], 1, + (sizeof(struct virtio_net_hdr) + (ETHER_MAX_LEN - ETHER_HDR_LEN)), + VIRTIO_NET_TX_MAXNSEGS + 1, "tx") != 0) { + goto err; + } + vsc->sc_nvqs = 2; + sc->sc_vq[VQTX].vq_done = vio_tx_intr; + virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]); + if (features & VIRTIO_F_RING_EVENT_IDX) + virtio_postpone_intr_far(&sc->sc_vq[VQTX]); + else + virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]); + if ((features & VIRTIO_NET_F_CTRL_VQ) + && (features & VIRTIO_NET_F_CTRL_RX)) { + if (virtio_alloc_vq(vsc, &sc->sc_vq[VQCTL], 2, NBPG, 1, + "control") == 0) { + sc->sc_vq[VQCTL].vq_done = vio_ctrleof; + virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]); + vsc->sc_nvqs = 3; + } + } + + if (vio_alloc_mem(sc) < 0) + goto err; + + strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ); + ifp->if_softc = sc; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_start = vio_start; + ifp->if_ioctl = vio_ioctl; + ifp->if_capabilities = 0; + IFQ_SET_MAXLEN(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1); + IFQ_SET_READY(&ifp->if_snd); + ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status); + ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); + vsc->sc_config_change = vio_config_change; + m_clsetwms(ifp, MCLBYTES, 4, sc->sc_vq[VQRX].vq_num); + timeout_set(&sc->sc_tick, vio_txtick, &sc->sc_vq[VQTX]); + + if_attach(ifp); + ether_ifattach(ifp); + + return; + +err: + if (vsc->sc_nvqs == 3) { + virtio_free_vq(vsc, &sc->sc_vq[2]); + vsc->sc_nvqs = 2; + } + if (vsc->sc_nvqs == 2) { + virtio_free_vq(vsc, &sc->sc_vq[1]); + vsc->sc_nvqs = 1; + } + if (vsc->sc_nvqs == 1) { + virtio_free_vq(vsc, &sc->sc_vq[0]); + vsc->sc_nvqs = 0; + } + vsc->sc_child = VIRTIO_CHILD_ERROR; + return; +} + +/* check link status */ +int +vio_link_state(struct ifnet *ifp) +{ + struct vio_softc *sc = ifp->if_softc; + struct virtio_softc *vsc = sc->sc_virtio; + int link_state = LINK_STATE_FULL_DUPLEX; + + if (vsc->sc_features & VIRTIO_NET_F_STATUS) { + int status = virtio_read_device_config_2(vsc, + VIRTIO_NET_CONFIG_STATUS); + if (!(status & VIRTIO_NET_S_LINK_UP)) + link_state = LINK_STATE_DOWN; + } + if (ifp->if_link_state != link_state) { + ifp->if_link_state = link_state; + if_link_state_change(ifp); + } + return 0; +} + +int +vio_config_change(struct virtio_softc *vsc) +{ + struct vio_softc *sc = (struct vio_softc *)vsc->sc_child; + vio_link_state(&sc->sc_ac.ac_if); + return 1; +} + +int +vio_media_change(struct ifnet *ifp) +{ + /* Ignore */ + return (0); +} + +void +vio_media_status(struct ifnet *ifp, struct ifmediareq *imr) +{ + imr->ifm_active = IFM_ETHER | IFM_AUTO; + imr->ifm_status = IFM_AVALID; + + vio_link_state(ifp); + if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP) + imr->ifm_status |= IFM_ACTIVE|IFM_FDX; +} + +/* + * Interface functions for ifnet + */ +int +vio_init(struct ifnet *ifp) +{ + struct vio_softc *sc = ifp->if_softc; + + vio_stop(ifp, 0); + vio_populate_rx_mbufs(sc); + ifp->if_flags |= IFF_RUNNING; + ifp->if_flags &= ~IFF_OACTIVE; + vio_iff(sc); + vio_link_state(ifp); + return 0; +} + +void +vio_stop(struct ifnet *ifp, int disable) +{ + struct vio_softc *sc = ifp->if_softc; + struct virtio_softc *vsc = sc->sc_virtio; + + timeout_del(&sc->sc_tick); + ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE); + /* only way to stop I/O and DMA is resetting... */ + virtio_reset(vsc); + vio_rxeof(sc); + if (vsc->sc_nvqs >= 3) + vio_ctrleof(&sc->sc_vq[VQCTL]); + vio_tx_drain(sc); + if (disable) + vio_rx_drain(sc); + + virtio_reinit_start(vsc); + virtio_negotiate_features(vsc, vsc->sc_features, NULL); + virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]); + virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]); + if (vsc->sc_nvqs >= 3) + virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]); + virtio_reinit_end(vsc); + if (vsc->sc_nvqs >= 3) { + if (sc->sc_ctrl_inuse != FREE) + sc->sc_ctrl_inuse = RESET; + wakeup(&sc->sc_ctrl_inuse); + } +} + +void +vio_start(struct ifnet *ifp) +{ + struct vio_softc *sc = ifp->if_softc; + struct virtio_softc *vsc = sc->sc_virtio; + struct virtqueue *vq = &sc->sc_vq[VQTX]; + struct mbuf *m; + int queued = 0; + + vio_txeof(vq); + + if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING) + return; + +again: + for (;;) { + int slot, r; + struct virtio_net_hdr *hdr; + + IFQ_POLL(&ifp->if_snd, m); + if (m == NULL) + break; + + r = virtio_enqueue_prep(vq, &slot); + if (r == EAGAIN) { + ifp->if_flags |= IFF_OACTIVE; + break; + } + if (r != 0) + panic("enqueue_prep for a tx buffer: %d", r); + r = vio_encap(sc, slot, m, &sc->sc_tx_mbufs[slot]); + if (r != 0) { + virtio_enqueue_abort(vq, slot); + ifp->if_flags |= IFF_OACTIVE; + break; + } + r = virtio_enqueue_reserve(vq, slot, + sc->sc_tx_dmamaps[slot]->dm_nsegs + 1); + if (r != 0) { + bus_dmamap_unload(vsc->sc_dmat, + sc->sc_tx_dmamaps[slot]); + sc->sc_tx_mbufs[slot] = NULL; + ifp->if_flags |= IFF_OACTIVE; + break; + } + IFQ_DEQUEUE(&ifp->if_snd, m); + + hdr = &sc->sc_tx_hdrs[slot]; + memset(hdr, 0, sizeof(*hdr)); + bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0, + sc->sc_tx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_PREWRITE); + VIO_DMAMEM_SYNC(vsc, sc, hdr, sizeof(*hdr), + BUS_DMASYNC_PREWRITE); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, hdr, sizeof(*hdr), 1); + virtio_enqueue(vq, slot, sc->sc_tx_dmamaps[slot], 1); + virtio_enqueue_commit(vsc, vq, slot, 0); + queued++; +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); +#endif + } + if (ifp->if_flags & IFF_OACTIVE) { + int r; + if (vsc->sc_features & VIRTIO_F_RING_EVENT_IDX) + r = virtio_postpone_intr_smart(&sc->sc_vq[VQTX]); + else + r = virtio_start_vq_intr(vsc, &sc->sc_vq[VQTX]); + if (r) { + vio_txeof(vq); + goto again; + } + } + + if (queued > 0) { + virtio_notify(vsc, vq); + timeout_add_sec(&sc->sc_tick, 1); + } +} + +int +vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct vio_softc *sc = ifp->if_softc; + int s, r = 0; + struct ifaddr *ifa = (struct ifaddr *)data; + + s = splnet(); + switch (cmd) { + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + if (!(ifp->if_flags & IFF_RUNNING)) + vio_init(ifp); +#ifdef INET + if (ifa->ifa_addr->sa_family == AF_INET) + arp_ifinit(&sc->sc_ac, ifa); +#endif + break; + case SIOCSIFFLAGS: + if (ifp->if_flags & IFF_UP) { + if (ifp->if_flags & IFF_RUNNING) + r = ENETRESET; + else + vio_init(ifp); + } else { + if (ifp->if_flags & IFF_RUNNING) + vio_stop(ifp, 1); + } + break; + case SIOCGIFMEDIA: + case SIOCSIFMEDIA: + r = ifmedia_ioctl(ifp, (struct ifreq *)data, &sc->sc_media, + cmd); + break; + default: + r = ether_ioctl(ifp, &sc->sc_ac, cmd, data); + } + + if (r == ENETRESET) { + if (ifp->if_flags & IFF_RUNNING) + vio_iff(sc); + r = 0; + } + splx(s); + return r; +} + +/* + * Recieve implementation + */ +/* allocate and initialize a mbuf for recieve */ +int +vio_add_rx_mbuf(struct vio_softc *sc, int i) +{ + struct mbuf *m; + int r; + + m = MCLGETI(NULL, M_DONTWAIT, &sc->sc_ac.ac_if, MCLBYTES); + if (m == NULL) + return ENOBUFS; + sc->sc_rx_mbufs[i] = m; + m->m_len = m->m_pkthdr.len = m->m_ext.ext_size; + r = bus_dmamap_load_mbuf(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i], + m, BUS_DMA_READ|BUS_DMA_NOWAIT); + if (r) { + m_freem(m); + sc->sc_rx_mbufs[i] = 0; + return r; + } + + return 0; +} + +/* free a mbuf for recieve */ +void +vio_free_rx_mbuf(struct vio_softc *sc, int i) +{ + bus_dmamap_unload(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i]); + m_freem(sc->sc_rx_mbufs[i]); + sc->sc_rx_mbufs[i] = NULL; +} + +/* add mbufs for all the empty recieve slots */ +void +vio_populate_rx_mbufs(struct vio_softc *sc) +{ + struct virtio_softc *vsc = sc->sc_virtio; + int i, r, ndone = 0; + struct virtqueue *vq = &sc->sc_vq[VQRX]; + + for (i = 0; i < vq->vq_num; i++) { + int slot; + struct virtio_net_hdr *hdr; + r = virtio_enqueue_prep(vq, &slot); + if (r == EAGAIN) + break; + if (r != 0) + panic("enqueue_prep for rx buffers: %d", r); + if (sc->sc_rx_mbufs[slot] == NULL) { + r = vio_add_rx_mbuf(sc, slot); + if (r != 0) { + virtio_enqueue_abort(vq, slot); + break; + } + } + r = virtio_enqueue_reserve(vq, slot, + sc->sc_rx_dmamaps[slot]->dm_nsegs + 1); + if (r != 0) { + vio_free_rx_mbuf(sc, slot); + break; + } + hdr = &sc->sc_rx_hdrs[slot]; + VIO_DMAMEM_SYNC(vsc, sc, hdr, sizeof(*hdr), + BUS_DMASYNC_PREREAD); + bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0, + MCLBYTES, BUS_DMASYNC_PREREAD); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, hdr, sizeof(*hdr), 0); + virtio_enqueue(vq, slot, sc->sc_rx_dmamaps[slot], 0); + virtio_enqueue_commit(vsc, vq, slot, 0); + ndone++; + } + if (ndone > 0) + virtio_notify(vsc, vq); +} + +/* dequeue recieved packets */ +int +vio_rxeof(struct vio_softc *sc) +{ + struct virtio_softc *vsc = sc->sc_virtio; + struct virtqueue *vq = &sc->sc_vq[VQRX]; + struct ifnet *ifp = &sc->sc_ac.ac_if; + struct mbuf *m; + int r = 0; + int slot, len; + + while (virtio_dequeue(vsc, vq, &slot, &len) == 0) { + struct virtio_net_hdr *hdr = &sc->sc_rx_hdrs[slot]; + len -= sizeof(struct virtio_net_hdr); + r = 1; + VIO_DMAMEM_SYNC(vsc, sc, hdr, sizeof(*hdr), + BUS_DMASYNC_POSTREAD); + bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0, + MCLBYTES, BUS_DMASYNC_POSTREAD); + m = sc->sc_rx_mbufs[slot]; + KASSERT(m != NULL); + bus_dmamap_unload(vsc->sc_dmat, sc->sc_rx_dmamaps[slot]); + sc->sc_rx_mbufs[slot] = 0; + virtio_dequeue_commit(vq, slot); + m->m_pkthdr.rcvif = ifp; + m->m_len = m->m_pkthdr.len = len; + m->m_pkthdr.csum_flags = 0; + ifp->if_ipackets++; +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_IN); +#endif + ether_input_mbuf(ifp, m); + } + return r; +} + +int +vio_rx_intr(struct virtqueue *vq) +{ + struct virtio_softc *vsc = vq->vq_owner; + struct vio_softc *sc = (struct vio_softc *)vsc->sc_child; + int r, sum = 0; + +again: + r = vio_rxeof(sc); + sum += r; + if (r) { + vio_populate_rx_mbufs(sc); + /* set used event index to the next slot */ + if (vsc->sc_features & VIRTIO_F_RING_EVENT_IDX) { + if (virtio_start_vq_intr(vq->vq_owner, vq)) + goto again; + } + } + + return sum; +} + +/* free all the mbufs; called from if_stop(disable) */ +void +vio_rx_drain(struct vio_softc *sc) +{ + struct virtqueue *vq = &sc->sc_vq[VQRX]; + int i; + + for (i = 0; i < vq->vq_num; i++) { + if (sc->sc_rx_mbufs[i] == NULL) + continue; + vio_free_rx_mbuf(sc, i); + } +} + +/* + * Transmition implementation + */ +/* actual transmission is done in if_start */ +/* tx interrupt; dequeue and free mbufs */ +/* + * tx interrupt is actually disabled unless the tx queue is full, i.e. + * IFF_OACTIVE is set. vio_txtick is used to make sure that mbufs + * are dequeued and freed even if no further transfer happens. + */ +int +vio_tx_intr(struct virtqueue *vq) +{ + struct virtio_softc *vsc = vq->vq_owner; + struct vio_softc *sc = (struct vio_softc *)vsc->sc_child; + struct ifnet *ifp = &sc->sc_ac.ac_if; + int r; + + r = vio_txeof(vq); + if (!IFQ_IS_EMPTY(&ifp->if_snd)) + vio_start(ifp); + return r; +} + +void +vio_txtick(void *arg) +{ + struct virtqueue *vq = arg; + int s = splnet(); + vio_tx_intr(vq); + splx(s); +} + +int +vio_txeof(struct virtqueue *vq) +{ + struct virtio_softc *vsc = vq->vq_owner; + struct vio_softc *sc = (struct vio_softc *)vsc->sc_child; + struct ifnet *ifp = &sc->sc_ac.ac_if; + struct mbuf *m; + int r = 0; + int slot, len; + + while (virtio_dequeue(vsc, vq, &slot, &len) == 0) { + struct virtio_net_hdr *hdr = &sc->sc_tx_hdrs[slot]; + r++; + VIO_DMAMEM_SYNC(vsc, sc, hdr, sizeof(*hdr), + BUS_DMASYNC_POSTWRITE); + bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0, + sc->sc_tx_dmamaps[slot]->dm_mapsize, + BUS_DMASYNC_POSTWRITE); + m = sc->sc_tx_mbufs[slot]; + bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[slot]); + sc->sc_tx_mbufs[slot] = 0; + virtio_dequeue_commit(vq, slot); + ifp->if_opackets++; + m_freem(m); + } + + if (r) { + ifp->if_flags &= ~IFF_OACTIVE; + virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]); + } + if (vq->vq_used_idx == vq->vq_avail_idx) + timeout_del(&sc->sc_tick); + else if (r) + timeout_add_sec(&sc->sc_tick, 1); + return r; +} + +int +vio_encap(struct vio_softc *sc, int slot, struct mbuf *m, + struct mbuf **mnew) +{ + struct virtio_softc *vsc = sc->sc_virtio; + bus_dmamap_t dmap= sc->sc_tx_dmamaps[slot]; + struct mbuf *m0 = NULL; + int r; + + r = bus_dmamap_load_mbuf(vsc->sc_dmat, dmap, m, + BUS_DMA_WRITE|BUS_DMA_NOWAIT); + if (r == 0) { + *mnew = m; + return r; + } + if (r != EFBIG) + return r; + /* EFBIG: mbuf chain is too fragmented */ + MGETHDR(m0, M_DONTWAIT, MT_DATA); + if (m0 == NULL) + return ENOBUFS; + if (m->m_pkthdr.len > MHLEN) { + MCLGETI(m0, M_DONTWAIT, NULL, m->m_pkthdr.len); + if (!(m0->m_flags & M_EXT)) { + m_freem(m0); + return ENOBUFS; + } + } + m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t)); + m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len; + r = bus_dmamap_load_mbuf(vsc->sc_dmat, dmap, m0, + BUS_DMA_NOWAIT|BUS_DMA_WRITE); + if (r != 0) { + m_freem(m0); + printf("%s: tx dmamap load error %d\n", sc->sc_dev.dv_xname, + r); + return ENOBUFS; + } + m_freem(m); + *mnew = m0; + return 0; +} + +/* free all the mbufs already put on vq; called from if_stop(disable) */ +void +vio_tx_drain(struct vio_softc *sc) +{ + struct virtio_softc *vsc = sc->sc_virtio; + struct virtqueue *vq = &sc->sc_vq[VQTX]; + int i; + + for (i = 0; i < vq->vq_num; i++) { + if (sc->sc_tx_mbufs[i] == NULL) + continue; + bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[i]); + m_freem(sc->sc_tx_mbufs[i]); + sc->sc_tx_mbufs[i] = NULL; + } +} + +/* + * Control vq + */ +/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */ +int +vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff) +{ + struct virtio_softc *vsc = sc->sc_virtio; + struct virtqueue *vq = &sc->sc_vq[VQCTL]; + int r, slot; + + if (vsc->sc_nvqs < 3) + return ENOTSUP; + + splassert(IPL_NET); + vio_wait_ctrl(sc); + + sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX; + sc->sc_ctrl_cmd->command = cmd; + sc->sc_ctrl_rx->onoff = onoff; + + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx, + sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_PREWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD); + + r = virtio_enqueue_prep(vq, &slot); + if (r != 0) + panic("%s: control vq busy!?", sc->sc_dev.dv_xname); + r = virtio_enqueue_reserve(vq, slot, 3); + if (r != 0) + panic("%s: control vq busy!?", sc->sc_dev.dv_xname); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), 1); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_rx, + sizeof(*sc->sc_ctrl_rx), 1); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), 0); + virtio_enqueue_commit(vsc, vq, slot, 1); + + if (vio_wait_ctrl_done(sc)) { + r = EIO; + goto out; + } + + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx, + sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_POSTWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD); + + if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) { + r = 0; + } else { + printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname, cmd); + r = EIO; + } + + DBGPRINT("cmd %d %d: %d", cmd, (int)onoff, r); +out: + vio_ctrl_wakeup(sc, FREE); + return r; +} + +void +vio_wait_ctrl(struct vio_softc *sc) +{ + while (sc->sc_ctrl_inuse != FREE) + tsleep(&sc->sc_ctrl_inuse, IPL_NET, "vio_wait", 0); + sc->sc_ctrl_inuse = INUSE; +} + +int +vio_wait_ctrl_done(struct vio_softc *sc) +{ + int r = 0; + while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) { + if (sc->sc_ctrl_inuse == RESET) { + r = 1; + break; + } + tsleep(&sc->sc_ctrl_inuse, IPL_NET, "vio_wait", 0); + } + return r; +} + +void +vio_ctrl_wakeup(struct vio_softc *sc, enum vio_ctrl_state new) +{ + sc->sc_ctrl_inuse = new; + wakeup(&sc->sc_ctrl_inuse); +} + +int +vio_ctrleof(struct virtqueue *vq) +{ + struct virtio_softc *vsc = vq->vq_owner; + struct vio_softc *sc = (struct vio_softc *)vsc->sc_child; + int r = 0, ret, slot; + +again: + ret = virtio_dequeue(vsc, vq, &slot, NULL); + if (ret == ENOENT) + return r; + virtio_dequeue_commit(vq, slot); + r++; + vio_ctrl_wakeup(sc, DONE); + if (virtio_start_vq_intr(vsc, vq)) + goto again; + + return r; +} + +/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */ +int +vio_set_rx_filter(struct vio_softc *sc) +{ + /* filter already set in sc_ctrl_mac_tbl */ + struct virtio_softc *vsc = sc->sc_virtio; + struct virtqueue *vq = &sc->sc_vq[VQCTL]; + int r, slot; + + splassert(IPL_NET); + + if (vsc->sc_nvqs < 3) + return ENOTSUP; + + vio_wait_ctrl(sc); + + sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC; + sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET; + + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_info, + VIO_CTRL_MAC_INFO_SIZE, BUS_DMASYNC_PREWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD); + + r = virtio_enqueue_prep(vq, &slot); + if (r != 0) + panic("%s: control vq busy!?", sc->sc_dev.dv_xname); + r = virtio_enqueue_reserve(vq, slot, 4); + if (r != 0) + panic("%s: control vq busy!?", sc->sc_dev.dv_xname); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), 1); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_mac_tbl_uc, + sizeof(*sc->sc_ctrl_mac_tbl_uc) + + sc->sc_ctrl_mac_tbl_uc->nentries * ETHER_ADDR_LEN, 1); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_mac_tbl_mc, + sizeof(*sc->sc_ctrl_mac_tbl_mc) + + sc->sc_ctrl_mac_tbl_mc->nentries * ETHER_ADDR_LEN, 1); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), 0); + virtio_enqueue_commit(vsc, vq, slot, 1); + + if (vio_wait_ctrl_done(sc)) { + r = EIO; + goto out; + } + + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_info, + VIO_CTRL_MAC_INFO_SIZE, BUS_DMASYNC_POSTWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD); + + if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) { + r = 0; + } else { + printf("%s: failed setting rx filter\n", sc->sc_dev.dv_xname); + r = EIO; + } + +out: + vio_ctrl_wakeup(sc, FREE); + return r; +} + +/* + * If IFF_PROMISC requested, set promiscuous + * If multicast filter small enough (<=MAXENTRIES) set rx filter + * If large multicast filter exist use ALLMULTI + */ +/* + * If setting rx filter fails fall back to ALLMULTI + * If ALLMULTI fails fall back to PROMISC + */ +int +vio_iff(struct vio_softc *sc) +{ + struct virtio_softc *vsc = sc->sc_virtio; + struct ifnet *ifp = &sc->sc_ac.ac_if; + struct ether_multi *enm; + struct ether_multistep step; + int nentries = 0; + int promisc = 0, allmulti = 0, rxfilter = 0; + int r; + + splassert(IPL_NET); + + if (vsc->sc_nvqs < 3) { + /* no ctrl vq; always promisc */ + ifp->if_flags |= IFF_PROMISC; + return 0; + } + + if (ifp->if_flags & IFF_PROMISC) { + promisc = 1; + goto set; + } + + ETHER_FIRST_MULTI(step, &sc->sc_ac, enm); + while (enm != NULL) { + if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) { + allmulti = 1; + goto set; + } + if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) { + allmulti = 1; + goto set; + } + memcpy(sc->sc_ctrl_mac_tbl_mc->macs[nentries], enm->enm_addrlo, + ETHER_ADDR_LEN); + ETHER_NEXT_MULTI(step, enm); + nentries++; + } + rxfilter = 1; + +set: + if (rxfilter) { + sc->sc_ctrl_mac_tbl_uc->nentries = 0; + sc->sc_ctrl_mac_tbl_mc->nentries = nentries; + r = vio_set_rx_filter(sc); + if (r != 0) { + rxfilter = 0; + allmulti = 1; /* fallback */ + } + } else { + /* remove rx filter */ + sc->sc_ctrl_mac_tbl_uc->nentries = 0; + sc->sc_ctrl_mac_tbl_mc->nentries = 0; + r = vio_set_rx_filter(sc); + /* what to do on failure? */ + } + if (allmulti) { + r = vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, 1); + if (r != 0) { + allmulti = 0; + promisc = 1; /* fallback */ + } + } else { + r = vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, 0); + /* what to do on failure? */ + } + + return vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, promisc); +} diff --git a/sys/dev/pci/vioblk.c b/sys/dev/pci/vioblk.c new file mode 100644 index 00000000000..c87278b8a7d --- /dev/null +++ b/sys/dev/pci/vioblk.c @@ -0,0 +1,619 @@ +/* + * Copyright (c) 2012 Stefan Fritsch. + * Copyright (c) 2010 Minoura Makoto. + * Copyright (c) 1998, 2001 Manuel Bouyer. + * All rights reserved. + * + * This code is based in part on the NetBSD ld_virtio driver and the + * OpenBSD vdsk driver. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, 2011 Mark Kettenis + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/cdefs.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <machine/bus.h> + +#include <sys/device.h> +#include <sys/stat.h> +#include <sys/buf.h> +#include <dev/pci/pcivar.h> +#include <dev/pci/virtioreg.h> +#include <dev/pci/virtiovar.h> +#include <dev/pci/vioblkreg.h> + +#include <scsi/scsi_all.h> +#include <scsi/scsi_disk.h> +#include <scsi/scsiconf.h> + +#define VIOBLK_DONE -1 + +struct virtio_feature_name vioblk_feature_names[] = { + { VIRTIO_BLK_F_BARRIER, "Barrier" }, + { VIRTIO_BLK_F_SIZE_MAX, "SizeMax" }, + { VIRTIO_BLK_F_SEG_MAX, "SegMax" }, + { VIRTIO_BLK_F_GEOMETRY, "Geometry" }, + { VIRTIO_BLK_F_RO, "RO" }, + { VIRTIO_BLK_F_BLK_SIZE, "BlkSize" }, + { VIRTIO_BLK_F_SCSI, "SCSI" }, + { VIRTIO_BLK_F_FLUSH, "Flush" }, + { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, + { 0, NULL } +}; + +struct virtio_blk_req { + struct virtio_blk_req_hdr vr_hdr; + uint8_t vr_status; + struct scsi_xfer *vr_xs; + int vr_len; + bus_dmamap_t vr_cmdsts; + bus_dmamap_t vr_payload; +}; + +struct vioblk_softc { + struct device sc_dev; + struct virtio_softc *sc_virtio; + + struct virtqueue sc_vq[1]; + struct virtio_blk_req *sc_reqs; + bus_dma_segment_t sc_reqs_segs[1]; + + struct scsi_adapter sc_switch; + struct scsi_link sc_link; + + int sc_notify_on_empty; + + uint32_t sc_queued; + + /* device configuration */ + uint64_t sc_capacity; + uint32_t sc_xfer_max; + uint32_t sc_seg_max; +}; + +int vioblk_match(struct device *, void *, void *); +void vioblk_attach(struct device *, struct device *, void *); +int vioblk_alloc_reqs(struct vioblk_softc *, int); +int vioblk_vq_done(struct virtqueue *); +void vioblk_vq_done1(struct vioblk_softc *, struct virtio_softc *, + struct virtqueue *, int); +void vioblk_minphys(struct buf *, struct scsi_link *); + +void vioblk_scsi_cmd(struct scsi_xfer *); +int vioblk_dev_probe(struct scsi_link *); +void vioblk_dev_free(struct scsi_link *); + +void vioblk_scsi_inq(struct scsi_xfer *); +void vioblk_scsi_capacity(struct scsi_xfer *); +void vioblk_scsi_capacity16(struct scsi_xfer *); +void vioblk_scsi_done(struct scsi_xfer *, int); + +struct cfattach vioblk_ca = { + sizeof(struct vioblk_softc), + vioblk_match, + vioblk_attach, + NULL +}; + +struct cfdriver vioblk_cd = { + NULL, "vioblk", DV_DULL +}; + + +int vioblk_match(struct device *parent, void *match, void *aux) +{ + struct virtio_softc *va = aux; + if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_BLOCK) + return 1; + return 0; +} + +#if VIRTIO_DEBUG > 0 +#define DBGPRINT(fmt, args...) printf("%s: " fmt "\n", __func__, ## args) +#else +#define DBGPRINT(fmt, args...) do {} while (0) +#endif + +void +vioblk_minphys(struct buf *bp, struct scsi_link *sl) +{ + struct vioblk_softc *sc = sl->adapter_softc; + if (bp->b_bcount > sc->sc_xfer_max) + bp->b_bcount = sc->sc_xfer_max; +} + +void +vioblk_attach(struct device *parent, struct device *self, void *aux) +{ + struct vioblk_softc *sc = (struct vioblk_softc *)self; + struct virtio_softc *vsc = (struct virtio_softc *)parent; + struct scsibus_attach_args saa; + uint32_t features; + int qsize; + + vsc->sc_vqs = &sc->sc_vq[0]; + vsc->sc_nvqs = 1; + vsc->sc_config_change = 0; + if (vsc->sc_child) + panic("already attached to something else"); + vsc->sc_child = self; + vsc->sc_ipl = IPL_BIO; + vsc->sc_intrhand = virtio_vq_intr; + sc->sc_virtio = vsc; + + features = virtio_negotiate_features(vsc, + (VIRTIO_BLK_F_RO | VIRTIO_F_NOTIFY_ON_EMPTY | + VIRTIO_BLK_F_SIZE_MAX | VIRTIO_BLK_F_SEG_MAX | + VIRTIO_BLK_F_FLUSH), + vioblk_feature_names); + + + if (features & VIRTIO_BLK_F_SIZE_MAX) { + uint32_t size_max = virtio_read_device_config_4(vsc, + VIRTIO_BLK_CONFIG_SIZE_MAX); + if (size_max < NBPG) { + printf("\nMax segment size %u too low\n", size_max); + goto err; + } + } + + if (features & VIRTIO_BLK_F_SEG_MAX) { + sc->sc_seg_max = virtio_read_device_config_4(vsc, + VIRTIO_BLK_CONFIG_SEG_MAX); + sc->sc_seg_max = MIN(sc->sc_seg_max, MAXPHYS/NBPG + 2); + } else { + sc->sc_seg_max = MAXPHYS/NBPG + 2; + } + sc->sc_xfer_max = (sc->sc_seg_max - 2) * NBPG; + + sc->sc_capacity = virtio_read_device_config_8(vsc, + VIRTIO_BLK_CONFIG_CAPACITY); + + if (virtio_alloc_vq(vsc, &sc->sc_vq[0], 0, sc->sc_xfer_max, + sc->sc_seg_max, "I/O request") != 0) { + printf("\nCan't alloc virtqueue\n"); + goto err; + } + qsize = sc->sc_vq[0].vq_num; + sc->sc_vq[0].vq_done = vioblk_vq_done; + if (vioblk_alloc_reqs(sc, qsize) < 0) { + printf("\nCan't alloc reqs\n"); + goto err; + } + + if (features & VIRTIO_F_NOTIFY_ON_EMPTY) { + virtio_stop_vq_intr(vsc, &sc->sc_vq[0]); + sc->sc_notify_on_empty = 1; + } + else { + sc->sc_notify_on_empty = 0; + } + + sc->sc_queued = 0; + + sc->sc_switch.scsi_cmd = vioblk_scsi_cmd; + sc->sc_switch.scsi_minphys = vioblk_minphys; + sc->sc_switch.dev_probe = vioblk_dev_probe; + sc->sc_switch.dev_free = vioblk_dev_free; + + sc->sc_link.adapter = &sc->sc_switch; + sc->sc_link.adapter_softc = self; + sc->sc_link.adapter_buswidth = 2; + sc->sc_link.luns = 1; + sc->sc_link.adapter_target = 2; + sc->sc_link.openings = qsize; + DBGPRINT("; qsize: %d seg_max: %d", qsize, sc->sc_seg_max); + if (features & VIRTIO_BLK_F_RO) + sc->sc_link.flags |= SDEV_READONLY; + + bzero(&saa, sizeof(saa)); + saa.saa_sc_link = &sc->sc_link; + printf("\n"); + config_found(self, &saa, scsiprint); + + return; +err: + vsc->sc_child = VIRTIO_CHILD_ERROR; + return; +} + +int +vioblk_vq_done(struct virtqueue *vq) +{ + struct virtio_softc *vsc = vq->vq_owner; + struct vioblk_softc *sc = (struct vioblk_softc *)vsc->sc_child; + int slot; + int ret = 0; + + if (!sc->sc_notify_on_empty) + virtio_stop_vq_intr(vsc, vq); + for (;;) { + if (virtio_dequeue(vsc, vq, &slot, NULL) != 0) { + if (sc->sc_notify_on_empty) + break; + virtio_start_vq_intr(vsc, vq); + if (virtio_dequeue(vsc, vq, &slot, NULL) != 0) + break; + } + vioblk_vq_done1(sc, vsc, vq, slot); + ret = 1; + } + return ret; +} + +void +vioblk_vq_done1(struct vioblk_softc *sc, struct virtio_softc *vsc, + struct virtqueue *vq, int slot) +{ + struct virtio_blk_req *vr = &sc->sc_reqs[slot]; + struct scsi_xfer *xs = vr->vr_xs; + KASSERT(vr->vr_len != VIOBLK_DONE); + bus_dmamap_sync(vsc->sc_dmat, vr->vr_cmdsts, 0, + sizeof(struct virtio_blk_req_hdr), BUS_DMASYNC_POSTWRITE); + if (vr->vr_hdr.type != VIRTIO_BLK_T_FLUSH) { + bus_dmamap_sync(vsc->sc_dmat, vr->vr_payload, 0, vr->vr_len, + (vr->vr_hdr.type == VIRTIO_BLK_T_IN) ? + BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); + } + bus_dmamap_sync(vsc->sc_dmat, vr->vr_cmdsts, + sizeof(struct virtio_blk_req_hdr), sizeof(uint8_t), + BUS_DMASYNC_POSTREAD); + + + if (vr->vr_status != VIRTIO_BLK_S_OK) { + DBGPRINT("EIO"); + xs->error = XS_DRIVER_STUFFUP; + xs->resid = xs->datalen; + } else { + xs->error = XS_NOERROR; + xs->resid = xs->datalen - vr->vr_len; + } + scsi_done(xs); + vr->vr_len = VIOBLK_DONE; + + virtio_dequeue_commit(vq, slot); +} + +void +vioblk_scsi_cmd(struct scsi_xfer *xs) +{ + struct scsi_rw *rw; + struct scsi_rw_big *rwb; + u_int64_t lba = 0; + u_int32_t sector_count; + uint8_t operation; + int isread; + + switch (xs->cmd->opcode) { + case READ_BIG: + case READ_COMMAND: + operation = VIRTIO_BLK_T_IN; + isread = 1; + break; + case WRITE_BIG: + case WRITE_COMMAND: + operation = VIRTIO_BLK_T_OUT; + isread = 0; + break; + + case SYNCHRONIZE_CACHE: + operation = VIRTIO_BLK_T_FLUSH; + break; + + case INQUIRY: + vioblk_scsi_inq(xs); + return; + case READ_CAPACITY: + vioblk_scsi_capacity(xs); + return; + case READ_CAPACITY_16: + vioblk_scsi_capacity16(xs); + return; + + case TEST_UNIT_READY: + case START_STOP: + case PREVENT_ALLOW: + vioblk_scsi_done(xs, XS_NOERROR); + return; + + default: + printf("%s cmd 0x%02x\n", __func__, xs->cmd->opcode); + case MODE_SENSE: + case MODE_SENSE_BIG: + case REPORT_LUNS: + vioblk_scsi_done(xs, XS_DRIVER_STUFFUP); + return; + } + + if (xs->cmdlen == 6) { + rw = (struct scsi_rw *)xs->cmd; + lba = _3btol(rw->addr) & (SRW_TOPADDR << 16 | 0xffff); + sector_count = rw->length ? rw->length : 0x100; + } else { + rwb = (struct scsi_rw_big *)xs->cmd; + lba = _4btol(rwb->addr); + sector_count = _2btol(rwb->length); + } + +{ + struct vioblk_softc *sc = xs->sc_link->adapter_softc; + struct virtqueue *vq = &sc->sc_vq[0]; + struct virtio_blk_req *vr; + struct virtio_softc *vsc = sc->sc_virtio; + int len, s; + int timeout; + int slot, ret, nsegs; + + s = splbio(); + ret = virtio_enqueue_prep(vq, &slot); + if (ret) { + DBGPRINT("virtio_enqueue_prep: %d, vq_num: %d, sc_queued: %d", + ret, vq->vq_num, sc->sc_queued); + vioblk_scsi_done(xs, XS_NO_CCB); + splx(s); + return; + } + vr = &sc->sc_reqs[slot]; + if (operation != VIRTIO_BLK_T_FLUSH) { + len = MIN(xs->datalen, sector_count * VIRTIO_BLK_SECTOR_SIZE); + ret = bus_dmamap_load(vsc->sc_dmat, vr->vr_payload, + xs->data, len, NULL, + ((isread ? BUS_DMA_READ : BUS_DMA_WRITE) | + BUS_DMA_NOWAIT)); + if (ret) { + DBGPRINT("bus_dmamap_load: %d", ret); + goto out_enq_abort; + } + nsegs = vr->vr_payload->dm_nsegs + 2; + } else { + len = 0; + nsegs = 2; + } + ret = virtio_enqueue_reserve(vq, slot, nsegs); + if (ret) { + DBGPRINT("virtio_enqueue_reserve: %d", ret); + bus_dmamap_unload(vsc->sc_dmat, vr->vr_payload); + goto out_done; + } + vr->vr_xs = xs; + vr->vr_hdr.type = operation; + vr->vr_hdr.ioprio = 0; + vr->vr_hdr.sector = lba; + vr->vr_len = len; + + bus_dmamap_sync(vsc->sc_dmat, vr->vr_cmdsts, + 0, sizeof(struct virtio_blk_req_hdr), + BUS_DMASYNC_PREWRITE); + if (operation != VIRTIO_BLK_T_FLUSH) { + bus_dmamap_sync(vsc->sc_dmat, vr->vr_payload, 0, len, + isread ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); + } + bus_dmamap_sync(vsc->sc_dmat, vr->vr_cmdsts, + offsetof(struct virtio_blk_req, vr_status), sizeof(uint8_t), + BUS_DMASYNC_PREREAD); + + virtio_enqueue_p(vq, slot, vr->vr_cmdsts, 0, + sizeof(struct virtio_blk_req_hdr), 1); + if (operation != VIRTIO_BLK_T_FLUSH) + virtio_enqueue(vq, slot, vr->vr_payload, !isread); + virtio_enqueue_p(vq, slot, vr->vr_cmdsts, + offsetof(struct virtio_blk_req, vr_status), sizeof(uint8_t), 0); + virtio_enqueue_commit(vsc, vq, slot, 1); + sc->sc_queued++; + + if (!ISSET(xs->flags, SCSI_POLL)) { + /* check if some xfers are done: */ + if (sc->sc_queued > 1) + vioblk_vq_done(vq); + splx(s); + return; + } + + timeout = 1000; + do { + if (vsc->sc_ops->intr(vsc) && vr->vr_len == VIOBLK_DONE) + break; + + delay(1000); + } while(--timeout > 0); + splx(s); + return; + +out_enq_abort: + virtio_enqueue_abort(vq, slot); +out_done: + vioblk_scsi_done(xs, XS_NO_CCB); + vr->vr_len = VIOBLK_DONE; + splx(s); +} +} + +void +vioblk_scsi_inq(struct scsi_xfer *xs) +{ + struct scsi_inquiry *inq = (struct scsi_inquiry *)xs->cmd; + struct scsi_inquiry_data inqd; + + if (ISSET(inq->flags, SI_EVPD)) { + vioblk_scsi_done(xs, XS_DRIVER_STUFFUP); + return; + } + + bzero(&inqd, sizeof(inqd)); + + inqd.device = T_DIRECT; + inqd.version = 0x05; /* SPC-3 */ + inqd.response_format = 2; + inqd.additional_length = 32; + inqd.flags |= SID_CmdQue; + bcopy("VirtIO ", inqd.vendor, sizeof(inqd.vendor)); + bcopy("Block Device ", inqd.product, sizeof(inqd.product)); + + bcopy(&inqd, xs->data, MIN(sizeof(inqd), xs->datalen)); + vioblk_scsi_done(xs, XS_NOERROR); +} + +void +vioblk_scsi_capacity(struct scsi_xfer *xs) +{ + struct vioblk_softc *sc = xs->sc_link->adapter_softc; + struct scsi_read_cap_data rcd; + uint64_t capacity; + + bzero(&rcd, sizeof(rcd)); + + capacity = sc->sc_capacity - 1; + if (capacity > 0xffffffff) + capacity = 0xffffffff; + + _lto4b(capacity, rcd.addr); + _lto4b(VIRTIO_BLK_SECTOR_SIZE, rcd.length); + + bcopy(&rcd, xs->data, MIN(sizeof(rcd), xs->datalen)); + vioblk_scsi_done(xs, XS_NOERROR); +} + +void +vioblk_scsi_capacity16(struct scsi_xfer *xs) +{ + struct vioblk_softc *sc = xs->sc_link->adapter_softc; + struct scsi_read_cap_data_16 rcd; + + bzero(&rcd, sizeof(rcd)); + + _lto8b(sc->sc_capacity - 1, rcd.addr); + _lto4b(VIRTIO_BLK_SECTOR_SIZE, rcd.length); + + bcopy(&rcd, xs->data, MIN(sizeof(rcd), xs->datalen)); + vioblk_scsi_done(xs, XS_NOERROR); +} + +void +vioblk_scsi_done(struct scsi_xfer *xs, int error) +{ + xs->error = error; + scsi_done(xs); +} + +int +vioblk_dev_probe(struct scsi_link *link) +{ + KASSERT(link->lun == 0); + if (link->target == 0) + return (0); + return (ENODEV); +} + +void +vioblk_dev_free(struct scsi_link *link) +{ + printf("%s\n", __func__); +} + +int +vioblk_alloc_reqs(struct vioblk_softc *sc, int qsize) +{ + int allocsize, r, rsegs, i; + void *vaddr; + + allocsize = sizeof(struct virtio_blk_req) * qsize; + r = bus_dmamem_alloc(sc->sc_virtio->sc_dmat, allocsize, 0, 0, + &sc->sc_reqs_segs[0], 1, &rsegs, BUS_DMA_NOWAIT); + if (r != 0) { + printf("DMA memory allocation failed, size %d, error %d\n", + allocsize, r); + goto err_none; + } + r = bus_dmamem_map(sc->sc_virtio->sc_dmat, &sc->sc_reqs_segs[0], 1, + allocsize, (caddr_t *)&vaddr, BUS_DMA_NOWAIT); + if (r != 0) { + printf("DMA memory map failed, error %d\n", r); + goto err_dmamem_alloc; + } + sc->sc_reqs = vaddr; + memset(vaddr, 0, allocsize); + for (i = 0; i < qsize; i++) { + struct virtio_blk_req *vr = &sc->sc_reqs[i]; + vr->vr_len = VIOBLK_DONE; + r = bus_dmamap_create(sc->sc_virtio->sc_dmat, + offsetof(struct virtio_blk_req, vr_xs), 1, + offsetof(struct virtio_blk_req, vr_xs), 0, + BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_cmdsts); + if (r != 0) { + printf("cmd dmamap creation failed, err %d\n", r); + goto err_reqs; + } + r = bus_dmamap_load(sc->sc_virtio->sc_dmat, vr->vr_cmdsts, + &vr->vr_hdr, offsetof(struct virtio_blk_req, vr_xs), NULL, + BUS_DMA_NOWAIT); + if (r != 0) { + printf("command dmamap load failed, err %d\n", r); + goto err_reqs; + } + r = bus_dmamap_create(sc->sc_virtio->sc_dmat, MAXPHYS, + sc->sc_seg_max, MAXPHYS, 0, + BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_payload); + if (r != 0) { + printf("payload dmamap creation failed, err %d\n", r); + goto err_reqs; + } + } + return 0; + +err_reqs: + for (i = 0; i < qsize; i++) { + struct virtio_blk_req *vr = &sc->sc_reqs[i]; + if (vr->vr_cmdsts) { + bus_dmamap_destroy(sc->sc_virtio->sc_dmat, + vr->vr_cmdsts); + vr->vr_cmdsts = 0; + } + if (vr->vr_payload) { + bus_dmamap_destroy(sc->sc_virtio->sc_dmat, + vr->vr_payload); + vr->vr_payload = 0; + } + } + bus_dmamem_unmap(sc->sc_virtio->sc_dmat, (caddr_t)sc->sc_reqs, + allocsize); +err_dmamem_alloc: + bus_dmamem_free(sc->sc_virtio->sc_dmat, &sc->sc_reqs_segs[0], 1); +err_none: + return -1; +} diff --git a/sys/dev/pci/vioblkreg.h b/sys/dev/pci/vioblkreg.h new file mode 100644 index 00000000000..81d90064895 --- /dev/null +++ b/sys/dev/pci/vioblkreg.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2012 Stefan Fritsch. + * Copyright (c) 2010 Minoura Makoto. + * Copyright (c) 1998, 2001 Manuel Bouyer. + * All rights reserved. + * + * This code is based in part on the NetBSD ld_virtio driver and the + * OpenBSD wd driver. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Configuration registers */ +#define VIRTIO_BLK_CONFIG_CAPACITY 0 /* 64bit */ +#define VIRTIO_BLK_CONFIG_SIZE_MAX 8 /* 32bit */ +#define VIRTIO_BLK_CONFIG_SEG_MAX 12 /* 32bit */ +#define VIRTIO_BLK_CONFIG_GEOMETRY_C 16 /* 16bit */ +#define VIRTIO_BLK_CONFIG_GEOMETRY_H 18 /* 8bit */ +#define VIRTIO_BLK_CONFIG_GEOMETRY_S 19 /* 8bit */ +#define VIRTIO_BLK_CONFIG_BLK_SIZE 20 /* 32bit */ + +/* Feature bits */ +#define VIRTIO_BLK_F_BARRIER (1<<0) +#define VIRTIO_BLK_F_SIZE_MAX (1<<1) +#define VIRTIO_BLK_F_SEG_MAX (1<<2) +#define VIRTIO_BLK_F_GEOMETRY (1<<4) +#define VIRTIO_BLK_F_RO (1<<5) +#define VIRTIO_BLK_F_BLK_SIZE (1<<6) +#define VIRTIO_BLK_F_SCSI (1<<7) +#define VIRTIO_BLK_F_FLUSH (1<<9) +#define VIRTIO_BLK_F_TOPOLOGY (1<<10) + +/* Command */ +#define VIRTIO_BLK_T_IN 0 +#define VIRTIO_BLK_T_OUT 1 +#define VIRTIO_BLK_T_SCSI_CMD 2 +#define VIRTIO_BLK_T_SCSI_CMD_OUT 3 +#define VIRTIO_BLK_T_FLUSH 4 +#define VIRTIO_BLK_T_FLUSH_OUT 5 +#define VIRTIO_BLK_T_GET_ID 8 /* from qemu, not in spec, yet */ +#define VIRTIO_BLK_T_BARRIER 0x80000000 + +/* Status */ +#define VIRTIO_BLK_S_OK 0 +#define VIRTIO_BLK_S_IOERR 1 + +#define VIRTIO_BLK_ID_BYTES 20 /* length of serial number */ + +/* Request header structure */ +struct virtio_blk_req_hdr { + uint32_t type; /* VIRTIO_BLK_T_* */ + uint32_t ioprio; + uint64_t sector; +} __packed; +/* 512*virtio_blk_req_hdr.sector byte payload and 1 byte status follows */ + +#define VIRTIO_BLK_SECTOR_SIZE 512 diff --git a/sys/dev/pci/virtio.c b/sys/dev/pci/virtio.c new file mode 100644 index 00000000000..d886b8c8e7c --- /dev/null +++ b/sys/dev/pci/virtio.c @@ -0,0 +1,918 @@ +/* $NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $ */ + +/* + * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg. + * Copyright (c) 2010 Minoura Makoto. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/device.h> +#include <sys/mutex.h> + +#include <dev/pci/pcidevs.h> +#include <dev/pci/pcireg.h> +#include <dev/pci/pcivar.h> + +#include <dev/pci/virtioreg.h> +#include <dev/pci/virtiovar.h> + +#define MINSEG_INDIRECT 2 /* use indirect if nsegs >= this value */ + +#if VIRTIO_DEBUG +#define VIRITO_ASSERT(x) KASSERT(x) +#else +#define VIRITO_ASSERT(x) +#endif + +void virtio_init_vq(struct virtio_softc *, + struct virtqueue *, int); +void vq_free_entry(struct virtqueue *, struct vq_entry *); +void vq_free_entry_locked(struct virtqueue *, struct vq_entry *); +struct vq_entry *vq_alloc_entry(struct virtqueue *); + +struct cfdriver virtio_cd = { + NULL, "virtio", DV_DULL +}; + +#define virtio_set_status(sc, s) (sc)->sc_ops->set_status(sc, s) +#define virtio_device_reset(sc) virtio_set_status((sc), 0) + +static const char * const virtio_device_name[] = { + "Unknown (0)", /* 0 */ + "Network", /* 1 */ + "Block", /* 2 */ + "Console", /* 3 */ + "Entropy", /* 4 */ + "Memory Balloon", /* 5 */ + "IO Memory", /* 6 */ + "Rpmsg", /* 7 */ + "SCSI host", /* 8 */ + "9P Transport" /* 9 */ + "mac80211 wlan" /* 10 */ +}; +#define NDEVNAMES (sizeof(virtio_device_name)/sizeof(char*)) + +static const struct virtio_feature_name transport_feature_names[] = { + { VIRTIO_F_NOTIFY_ON_EMPTY, "NotifyOnEmpty"}, + { VIRTIO_F_RING_INDIRECT_DESC, "RingIndirectDesc"}, + { VIRTIO_F_RING_EVENT_IDX, "RingEventIdx"}, + { VIRTIO_F_BAD_FEATURE, "BadFeature"}, + { 0, NULL} +}; + +const char * +virtio_device_string(int id) +{ + return id < NDEVNAMES ? virtio_device_name[id] : "Unknown"; +} + +void +virtio_log_features(uint32_t host, uint32_t neg, + const struct virtio_feature_name *guest_feature_names) +{ + const struct virtio_feature_name *namep; + int i; + char c; + uint32_t bit; + + for (i = 0; i < 32; i++) { + if (i == 30) { + /* + * VIRTIO_F_BAD_FEATURE is only used for + * checking correct negotiation + */ + continue; + } + bit = 1 << i; + if ((host&bit) == 0) + continue; + namep = (i < 24) ? guest_feature_names : + transport_feature_names; + while (namep->bit && namep->bit != bit) + namep++; + c = (neg&bit) ? '+' : '-'; + if (namep->name) + printf(" %c%s", c, namep->name); + else + printf(" %cUnknown(%d)", c, i); + } +} + +/* + * Reset the device. + */ +/* + * To reset the device to a known state, do following: + * virtio_reset(sc); // this will stop the device activity + * <dequeue finished requests>; // virtio_dequeue() still can be called + * <revoke pending requests in the vqs if any>; + * virtio_reinit_start(sc); // dequeue prohibitted + * newfeatures = virtio_negotiate_features(sc, requestedfeatures); + * <some other initialization>; + * virtio_reinit_end(sc); // device activated; enqueue allowed + * Once attached, feature negotiation can only be allowed after virtio_reset. + */ +void +virtio_reset(struct virtio_softc *sc) +{ + virtio_device_reset(sc); +} + +void +virtio_reinit_start(struct virtio_softc *sc) +{ + int i; + + virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK); + virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER); + for (i = 0; i < sc->sc_nvqs; i++) { + int n; + struct virtqueue *vq = &sc->sc_vqs[i]; + n = virtio_read_queue_size(sc, vq->vq_index); + if (n == 0) /* vq disappeared */ + continue; + if (n != vq->vq_num) { + panic("%s: virtqueue size changed, vq index %d\n", + sc->sc_dev.dv_xname, vq->vq_index); + } + virtio_init_vq(sc, vq, 1); + virtio_write_queue_address(sc, vq->vq_index, + vq->vq_dmamap->dm_segs[0].ds_addr / VIRTIO_PAGE_SIZE); + } +} + +void +virtio_reinit_end(struct virtio_softc *sc) +{ + virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK); +} + +/* + * dmamap sync operations for a virtqueue. + */ +static inline void +vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops) +{ + /* availoffset == sizeof(vring_desc)*vq_num */ + bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset, + ops); +} + +static inline void +vq_sync_aring(struct virtio_softc *sc, struct virtqueue *vq, int ops) +{ + bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, + vq->vq_availoffset, + offsetof(struct vring_avail, ring) + + vq->vq_num * sizeof(uint16_t), + ops); +} + +static inline void +vq_sync_uring(struct virtio_softc *sc, struct virtqueue *vq, int ops) +{ + bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, + vq->vq_usedoffset, + offsetof(struct vring_used, ring) + + vq->vq_num * sizeof(struct vring_used_elem), + ops); +} + +static inline void +vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot, + int ops) +{ + int offset = vq->vq_indirectoffset + + sizeof(struct vring_desc) * vq->vq_maxnsegs * slot; + + bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, + offset, sizeof(struct vring_desc) * vq->vq_maxnsegs, + ops); +} + +/* + * Can be used as sc_intrhand. + */ +/* + * Scan vq, bus_dmamap_sync for the vqs (not for the payload), + * and calls (*vq_done)() if some entries are consumed. + */ +int +virtio_vq_intr(struct virtio_softc *sc) +{ + struct virtqueue *vq; + int i, r = 0; + + /* going backwards is better for if_vio */ + for (i = sc->sc_nvqs - 1; i >= 0; i--) { + vq = &sc->sc_vqs[i]; + if (vq->vq_queued) { + vq->vq_queued = 0; + vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE); + } + vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD); + if (vq->vq_used_idx != vq->vq_used->idx) { + if (vq->vq_done) + r |= (vq->vq_done)(vq); + } + } + + return r; +} + +/* + * Initialize vq structure. + */ +void +virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq, int reinit) +{ + int i, j; + int vq_size = vq->vq_num; + + memset(vq->vq_vaddr, 0, vq->vq_bytesize); + + /* build the indirect descriptor chain */ + if (vq->vq_indirect != NULL) { + struct vring_desc *vd; + + for (i = 0; i < vq_size; i++) { + vd = vq->vq_indirect; + vd += vq->vq_maxnsegs * i; + for (j = 0; j < vq->vq_maxnsegs-1; j++) + vd[j].next = j + 1; + } + } + + /* free slot management */ + SIMPLEQ_INIT(&vq->vq_freelist); + for (i = 0; i < vq_size; i++) { + SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, + &vq->vq_entries[i], qe_list); + vq->vq_entries[i].qe_index = i; + } + + /* enqueue/dequeue status */ + vq->vq_avail_idx = 0; + vq->vq_avail_signalled = 0xffff; + vq->vq_used_idx = 0; + vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE); + vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD); + vq->vq_queued = 1; +} + +/* + * Allocate/free a vq. + */ +int +virtio_alloc_vq(struct virtio_softc *sc, + struct virtqueue *vq, int index, int maxsegsize, int maxnsegs, + const char *name) +{ + int vq_size, allocsize1, allocsize2, allocsize3, allocsize = 0; + int rsegs, r, hdrlen; +#define VIRTQUEUE_ALIGN(n) (((n)+(VIRTIO_PAGE_SIZE-1))& \ + ~(VIRTIO_PAGE_SIZE-1)) + + memset(vq, 0, sizeof(*vq)); + + vq_size = virtio_read_queue_size(sc, index); + if (vq_size == 0) { + printf("virtqueue not exist, index %d for %s\n", index, name); + goto err; + } + if (((vq_size - 1) & vq_size) != 0) + panic("vq_size not power of two: %d", vq_size); + + hdrlen = (sc->sc_features & VIRTIO_F_RING_EVENT_IDX) ? 3 : 2; + + /* allocsize1: descriptor table + avail ring + pad */ + allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size + + sizeof(uint16_t) * (hdrlen + vq_size)); + /* allocsize2: used ring + pad */ + allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * hdrlen + + sizeof(struct vring_used_elem)*vq_size); + /* allocsize3: indirect table */ + /* XXX: This is rather inefficient. In practice only a fraction of this + * XXX: memory will be used. + */ + if (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT) + allocsize3 = sizeof(struct vring_desc) * maxnsegs * vq_size; + else + allocsize3 = 0; + allocsize = allocsize1 + allocsize2 + allocsize3; + + /* alloc and map the memory */ + r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0, + &vq->vq_segs[0], 1, &rsegs, BUS_DMA_NOWAIT); + if (r != 0) { + printf("virtqueue %d for %s allocation failed, error %d\n", + index, name, r); + goto err; + } + r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], 1, allocsize, + (caddr_t*)&vq->vq_vaddr, BUS_DMA_NOWAIT); + if (r != 0) { + printf("virtqueue %d for %s map failed, error %d\n", + index, name, r); + goto err; + } + r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0, + BUS_DMA_NOWAIT, &vq->vq_dmamap); + if (r != 0) { + printf("virtqueue %d for %s dmamap creation failed, error %d\n", + index, name, r); + goto err; + } + r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap, + vq->vq_vaddr, allocsize, NULL, BUS_DMA_NOWAIT); + if (r != 0) { + printf("virtqueue %d for %s dmamap load failed, error %d\n", + index, name, r); + goto err; + } + + virtio_write_queue_address(sc, index, + vq->vq_dmamap->dm_segs[0].ds_addr / VIRTIO_PAGE_SIZE); + + /* remember addresses and offsets for later use */ + vq->vq_owner = sc; + vq->vq_num = vq_size; + vq->vq_mask = vq_size - 1; + vq->vq_index = index; + vq->vq_desc = vq->vq_vaddr; + vq->vq_availoffset = sizeof(struct vring_desc)*vq_size; + vq->vq_avail = (struct vring_avail*)(((char*)vq->vq_desc) + + vq->vq_availoffset); + vq->vq_usedoffset = allocsize1; + vq->vq_used = (struct vring_used*)(((char*)vq->vq_desc) + + vq->vq_usedoffset); + if (allocsize3 > 0) { + vq->vq_indirectoffset = allocsize1 + allocsize2; + vq->vq_indirect = (void*)(((char*)vq->vq_desc) + + vq->vq_indirectoffset); + } + vq->vq_bytesize = allocsize; + vq->vq_maxsegsize = maxsegsize; + vq->vq_maxnsegs = maxnsegs; + + /* free slot management */ + vq->vq_entries = malloc(sizeof(struct vq_entry)*vq_size, + M_DEVBUF, M_NOWAIT | M_ZERO); + if (vq->vq_entries == NULL) { + r = ENOMEM; + goto err; + } + + virtio_init_vq(sc, vq, 0); + +#if VIRTIO_DEBUG + printf("\nallocated %u byte for virtqueue %d for %s, size %d\n", + allocsize, index, name, vq_size); + if (allocsize3 > 0) + printf("using %d byte (%d entries) indirect descriptors\n", + allocsize3, maxnsegs * vq_size); +#endif + return 0; + +err: + virtio_write_queue_address(sc, index, 0); + if (vq->vq_dmamap) + bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap); + if (vq->vq_vaddr) + bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize); + if (vq->vq_segs[0].ds_addr) + bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1); + memset(vq, 0, sizeof(*vq)); + + return -1; +} + +int +virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq) +{ + struct vq_entry *qe; + int i = 0; + + /* device must be already deactivated */ + /* confirm the vq is empty */ + SIMPLEQ_FOREACH(qe, &vq->vq_freelist, qe_list) { + i++; + } + if (i != vq->vq_num) { + printf("%s: freeing non-empty vq, index %d\n", + sc->sc_dev.dv_xname, vq->vq_index); + return EBUSY; + } + + /* tell device that there's no virtqueue any longer */ + virtio_write_queue_address(sc, vq->vq_index, 0); + + free(vq->vq_entries, M_DEVBUF); + bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap); + bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap); + bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize); + bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1); + memset(vq, 0, sizeof(*vq)); + + return 0; +} + +/* + * Free descriptor management. + */ +struct vq_entry * +vq_alloc_entry(struct virtqueue *vq) +{ + struct vq_entry *qe; + + if (SIMPLEQ_EMPTY(&vq->vq_freelist)) + return NULL; + qe = SIMPLEQ_FIRST(&vq->vq_freelist); + SIMPLEQ_REMOVE_HEAD(&vq->vq_freelist, qe_list); + + return qe; +} + +void +vq_free_entry(struct virtqueue *vq, struct vq_entry *qe) +{ + SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, qe, qe_list); + return; +} + +void +vq_free_entry_locked(struct virtqueue *vq, struct vq_entry *qe) +{ + SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, qe, qe_list); +} + +/* + * Enqueue several dmamaps as a single request. + */ +/* + * Typical usage: + * <queue size> number of followings are stored in arrays + * - command blocks (in dmamem) should be pre-allocated and mapped + * - dmamaps for command blocks should be pre-allocated and loaded + * - dmamaps for payload should be pre-allocated + * r = virtio_enqueue_prep(sc, vq, &slot); // allocate a slot + * if (r) // currently 0 or EAGAIN + * return r; + * r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..); + * if (r) { + * virtio_enqueue_abort(sc, vq, slot); + * bus_dmamap_unload(dmat, dmamap_payload[slot]); + * return r; + * } + * r = virtio_enqueue_reserve(sc, vq, slot, + * dmamap_payload[slot]->dm_nsegs+1); + * // ^ +1 for command + * if (r) { // currently 0 or EAGAIN + * bus_dmamap_unload(dmat, dmamap_payload[slot]); + * return r; // do not call abort() + * } + * <setup and prepare commands> + * bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE); + * bus_dmamap_sync(dmat, dmamap_payload[slot],...); + * virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], 0); + * virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite); + * virtio_enqueue_commit(sc, vq, slot, 1); + */ + +/* + * enqueue_prep: allocate a slot number + */ +int +virtio_enqueue_prep(struct virtqueue *vq, int *slotp) +{ + struct vq_entry *qe1; + + VIRITO_ASSERT(slotp != NULL); + + qe1 = vq_alloc_entry(vq); + if (qe1 == NULL) + return EAGAIN; + /* next slot is not allocated yet */ + qe1->qe_next = -1; + *slotp = qe1->qe_index; + + return 0; +} + +/* + * enqueue_reserve: allocate remaining slots and build the descriptor chain. + * Calls virtio_enqueue_abort() on failure. + */ +int +virtio_enqueue_reserve(struct virtqueue *vq, int slot, int nsegs) +{ + int indirect; + struct vq_entry *qe1 = &vq->vq_entries[slot]; + + VIRITO_ASSERT(qe1->qe_next == -1); + VIRITO_ASSERT(1 <= nsegs && nsegs <= vq->vq_num); + + if ((vq->vq_indirect != NULL) && + (nsegs >= MINSEG_INDIRECT) && + (nsegs <= vq->vq_maxnsegs)) + indirect = 1; + else + indirect = 0; + qe1->qe_indirect = indirect; + + if (indirect) { + struct vring_desc *vd; + int i; + + vd = &vq->vq_desc[qe1->qe_index]; + vd->addr = vq->vq_dmamap->dm_segs[0].ds_addr + + vq->vq_indirectoffset; + vd->addr += sizeof(struct vring_desc) + * vq->vq_maxnsegs * qe1->qe_index; + vd->len = sizeof(struct vring_desc) * nsegs; + vd->flags = VRING_DESC_F_INDIRECT; + + vd = vq->vq_indirect; + vd += vq->vq_maxnsegs * qe1->qe_index; + qe1->qe_desc_base = vd; + + for (i = 0; i < nsegs-1; i++) { + vd[i].flags = VRING_DESC_F_NEXT; + } + vd[i].flags = 0; + qe1->qe_next = 0; + + return 0; + } else { + struct vring_desc *vd; + struct vq_entry *qe; + int i, s; + + vd = &vq->vq_desc[0]; + qe1->qe_desc_base = vd; + qe1->qe_next = qe1->qe_index; + s = slot; + for (i = 0; i < nsegs - 1; i++) { + qe = vq_alloc_entry(vq); + if (qe == NULL) { + vd[s].flags = 0; + virtio_enqueue_abort(vq, slot); + return EAGAIN; + } + vd[s].flags = VRING_DESC_F_NEXT; + vd[s].next = qe->qe_index; + s = qe->qe_index; + } + vd[s].flags = 0; + + return 0; + } +} + +/* + * enqueue: enqueue a single dmamap. + */ +int +virtio_enqueue(struct virtqueue *vq, int slot, bus_dmamap_t dmamap, int write) +{ + struct vq_entry *qe1 = &vq->vq_entries[slot]; + struct vring_desc *vd = qe1->qe_desc_base; + int i; + int s = qe1->qe_next; + + VIRITO_ASSERT(s >= 0); + VIRITO_ASSERT(dmamap->dm_nsegs > 0); + if (dmamap->dm_nsegs > vq->vq_maxnsegs) { + for (i = 0; i < dmamap->dm_nsegs; i++) { + printf(" %d (%d): %p %u \n", i, write, + dmamap->dm_segs[i].ds_addr, + dmamap->dm_segs[i].ds_len); + } + panic("dmamap->dm_nseg %d > vq->vq_maxnsegs %d\n", + dmamap->dm_nsegs, vq->vq_maxnsegs); + } + + for (i = 0; i < dmamap->dm_nsegs; i++) { + vd[s].addr = dmamap->dm_segs[i].ds_addr; + vd[s].len = dmamap->dm_segs[i].ds_len; + if (!write) + vd[s].flags |= VRING_DESC_F_WRITE; + s = vd[s].next; + } + qe1->qe_next = s; + + return 0; +} + +int +virtio_enqueue_p(struct virtqueue *vq, int slot, bus_dmamap_t dmamap, + bus_addr_t start, bus_size_t len, int write) +{ + struct vq_entry *qe1 = &vq->vq_entries[slot]; + struct vring_desc *vd = qe1->qe_desc_base; + int s = qe1->qe_next; + + VIRITO_ASSERT(s >= 0); + /* XXX todo: handle more segments */ + VIRITO_ASSERT(dmamap->dm_nsegs == 1); + VIRITO_ASSERT((dmamap->dm_segs[0].ds_len > start) && + (dmamap->dm_segs[0].ds_len >= start + len)); + + vd[s].addr = dmamap->dm_segs[0].ds_addr + start; + vd[s].len = len; + if (!write) + vd[s].flags |= VRING_DESC_F_WRITE; + qe1->qe_next = vd[s].next; + + return 0; +} + +static void +publish_avail_idx(struct virtio_softc *sc, struct virtqueue *vq) +{ + vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE); + vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD); + vq->vq_avail->idx = vq->vq_avail_idx; + vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE); + vq->vq_queued = 1; + vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD); +} + +/* + * enqueue_commit: add it to the aring. + */ +int +virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, + int slot, int notifynow) +{ + struct vq_entry *qe1; + + if (slot < 0) + goto notify; + vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE); + qe1 = &vq->vq_entries[slot]; + if (qe1->qe_indirect) + vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE); + vq->vq_avail->ring[(vq->vq_avail_idx++) & vq->vq_mask] = slot; + +notify: + if (notifynow) { + if (vq->vq_owner->sc_features & VIRTIO_F_RING_EVENT_IDX) { + uint16_t o = vq->vq_avail_signalled; + uint16_t n = vq->vq_avail_idx; + uint16_t t = VQ_AVAIL_EVENT(vq) + 1; + publish_avail_idx(sc, vq); + if ((o < n && o < t && t <= n) + || (o > n && (o < t || t <= n))) { + sc->sc_ops->kick(sc, vq->vq_index); + vq->vq_avail_signalled = n; + } + } else { + publish_avail_idx(sc, vq); + if (!(vq->vq_used->flags & VRING_USED_F_NO_NOTIFY)) + sc->sc_ops->kick(sc, vq->vq_index); + } + } + + return 0; +} + +/* + * enqueue_abort: rollback. + */ +int +virtio_enqueue_abort(struct virtqueue *vq, int slot) +{ + struct vq_entry *qe = &vq->vq_entries[slot]; + struct vring_desc *vd; + int s; + + if (qe->qe_next < 0) { + vq_free_entry(vq, qe); + return 0; + } + + s = slot; + vd = &vq->vq_desc[0]; + while (vd[s].flags & VRING_DESC_F_NEXT) { + s = vd[s].next; + vq_free_entry_locked(vq, qe); + qe = &vq->vq_entries[s]; + } + vq_free_entry_locked(vq, qe); + return 0; +} + +/* + * Dequeue a request. + */ +/* + * dequeue: dequeue a request from uring; dmamap_sync for uring is + * already done in the interrupt handler. + */ +int +virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq, + int *slotp, int *lenp) +{ + uint16_t slot, usedidx; + struct vq_entry *qe; + + if (vq->vq_used_idx == vq->vq_used->idx) + return ENOENT; + usedidx = vq->vq_used_idx++; + usedidx &= vq->vq_mask; + slot = vq->vq_used->ring[usedidx].id; + qe = &vq->vq_entries[slot]; + + if (qe->qe_indirect) + vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE); + + if (slotp) + *slotp = slot; + if (lenp) + *lenp = vq->vq_used->ring[usedidx].len; + + return 0; +} + +/* + * dequeue_commit: complete dequeue; the slot is recycled for future use. + * if you forget to call this the slot will be leaked. + */ +int +virtio_dequeue_commit(struct virtqueue *vq, int slot) +{ + struct vq_entry *qe = &vq->vq_entries[slot]; + struct vring_desc *vd = &vq->vq_desc[0]; + int s = slot; + + while (vd[s].flags & VRING_DESC_F_NEXT) { + s = vd[s].next; + vq_free_entry_locked(vq, qe); + qe = &vq->vq_entries[s]; + } + vq_free_entry_locked(vq, qe); + + return 0; +} + +/* + * Increase the event index in order to delay interrupts. + * Returns 0 on success; returns 1 if the used ring has already advanced + * too far, and the caller must process the queue again (otherewise, no + * more interrupts will happen). + */ +int +virtio_postpone_intr(struct virtqueue *vq, uint16_t nslots) +{ + uint16_t idx; + + idx = vq->vq_used_idx + nslots; + + /* set the new event index: avail_ring->used_event = idx */ + VQ_USED_EVENT(vq) = idx; + + vq_sync_aring(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE); + vq->vq_queued++; + + if (nslots < virtio_nused(vq)) + return 1; + + return 0; +} + +/* + * Postpone interrupt until 3/4 of the available descriptors have been + * consumed. + */ +int +virtio_postpone_intr_smart(struct virtqueue *vq) +{ + uint16_t nslots; + + nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx) * 3 / 4; + + return virtio_postpone_intr(vq, nslots); +} + +/* + * Postpone interrupt until all of the available descriptors have been + * consumed. + */ +int +virtio_postpone_intr_far(struct virtqueue *vq) +{ + uint16_t nslots; + + nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx); + + return virtio_postpone_intr(vq, nslots); +} + + +/* + * Start/stop vq interrupt. No guarantee. + */ +void +virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq) +{ + if ((sc->sc_features & VIRTIO_F_RING_EVENT_IDX)) { + /* + * No way to disable the interrupt completely with + * RingEventIdx. Instead advance used_event by half + * the possible value. This won't happen soon and + * is far enough in the past to not trigger a spurios + * interrupt. + */ + VQ_USED_EVENT(vq) = vq->vq_used_idx + 0x8000; + } else { + vq->vq_avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; + } + vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE); + vq->vq_queued++; +} + +int +virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq) +{ + /* + * If event index feature is negotiated, enabling + * interrupts is done through setting the latest + * consumed index in the used_event field + */ + if (sc->sc_features & VIRTIO_F_RING_EVENT_IDX) + VQ_USED_EVENT(vq) = vq->vq_used_idx; + else + vq->vq_avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + + vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE); + vq->vq_queued++; + + if (vq->vq_used_idx != vq->vq_used->idx) + return 1; + + return 0; +} + +/* + * Returns a number of slots in the used ring available to + * be supplied to the avail ring. + */ +int +virtio_nused(struct virtqueue *vq) +{ + uint16_t n; + + n = (uint16_t)(vq->vq_used->idx - vq->vq_used_idx); + VIRITO_ASSERT(n <= vq->vq_num); + + return n; +} + +#if VIRTIO_DEBUG +void +virtio_vq_dump(struct virtqueue *vq) +{ + /* Common fields */ + printf(" + vq num: %d\n", vq->vq_num); + printf(" + vq mask: 0x%X\n", vq->vq_mask); + printf(" + vq index: %d\n", vq->vq_index); + printf(" + vq used idx: %d\n", vq->vq_used_idx); + printf(" + vq avail idx: %d\n", vq->vq_avail_idx); + printf(" + vq queued: %d\n",vq->vq_queued); + /* Avail ring fields */ + printf(" + avail flags: 0x%X\n", vq->vq_avail->flags); + printf(" + avail idx: %d\n", vq->vq_avail->idx); + printf(" + avail event: %d\n", VQ_AVAIL_EVENT(vq)); + /* Used ring fields */ + printf(" + used flags: 0x%X\n",vq->vq_used->flags); + printf(" + used idx: %d\n",vq->vq_used->idx); + printf(" + used event: %d\n", VQ_USED_EVENT(vq)); + printf(" +++++++++++++++++++++++++++\n"); +} +#endif diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c new file mode 100644 index 00000000000..fdfb27e76f8 --- /dev/null +++ b/sys/dev/pci/virtio_pci.c @@ -0,0 +1,411 @@ +/* $NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $ */ + +/* + * Copyright (c) 2012 Stefan Fritsch. + * Copyright (c) 2010 Minoura Makoto. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/device.h> +#include <sys/mutex.h> + +#include <dev/pci/pcidevs.h> +#include <dev/pci/pcireg.h> +#include <dev/pci/pcivar.h> + +#include <dev/pci/virtioreg.h> +#include <dev/pci/virtiovar.h> + +/* + * XXX: Before being used on big endian arches, the access to config registers + * XXX: needs to be reviewed/fixed. The non-device specific registers are + * XXX: PCI-endian while the device specific registers are native endian. + */ + +#define virtio_set_status(sc, s) virtio_pci_set_status(sc, s) +#define virtio_device_reset(sc) virtio_set_status((sc), 0) + +int virtio_pci_match(struct device *, void *, void *); +void virtio_pci_attach(struct device *, struct device *, void *); +int virtio_pci_detach(struct device *, int); + +void virtio_pci_kick(struct virtio_softc *, uint16_t); +uint8_t virtio_pci_read_device_config_1(struct virtio_softc *, int); +uint16_t virtio_pci_read_device_config_2(struct virtio_softc *, int); +uint32_t virtio_pci_read_device_config_4(struct virtio_softc *, int); +uint64_t virtio_pci_read_device_config_8(struct virtio_softc *, int); +void virtio_pci_write_device_config_1(struct virtio_softc *, int, uint8_t); +void virtio_pci_write_device_config_2(struct virtio_softc *, int, uint16_t); +void virtio_pci_write_device_config_4(struct virtio_softc *, int, uint32_t); +void virtio_pci_write_device_config_8(struct virtio_softc *, int, uint64_t); +uint16_t virtio_pci_read_queue_size(struct virtio_softc *, uint16_t); +void virtio_pci_write_queue_address(struct virtio_softc *, uint16_t, uint32_t); +void virtio_pci_set_status(struct virtio_softc *, int); +uint32_t virtio_pci_negotiate_features(struct virtio_softc *, uint32_t, + const struct virtio_feature_name *); +int virtio_pci_intr(void *); + +struct virtio_pci_softc { + struct virtio_softc sc_sc; + pci_chipset_tag_t sc_pc; + + bus_space_tag_t sc_iot; + bus_space_handle_t sc_ioh; + bus_size_t sc_iosize; + int sc_config_offset; +}; + +struct cfattach virtio_pci_ca = { + sizeof(struct virtio_pci_softc), + virtio_pci_match, + virtio_pci_attach, + virtio_pci_detach, + NULL +}; + +struct virtio_ops virtio_pci_ops = { + virtio_pci_kick, + virtio_pci_read_device_config_1, + virtio_pci_read_device_config_2, + virtio_pci_read_device_config_4, + virtio_pci_read_device_config_8, + virtio_pci_write_device_config_1, + virtio_pci_write_device_config_2, + virtio_pci_write_device_config_4, + virtio_pci_write_device_config_8, + virtio_pci_read_queue_size, + virtio_pci_write_queue_address, + virtio_pci_set_status, + virtio_pci_negotiate_features, + virtio_pci_intr, +}; + +uint16_t +virtio_pci_read_queue_size(struct virtio_softc *vsc, uint16_t idx) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + bus_space_write_2(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_QUEUE_SELECT, + idx); + return bus_space_read_2(sc->sc_iot, sc->sc_ioh, + VIRTIO_CONFIG_QUEUE_SIZE); +} + +void +virtio_pci_write_queue_address(struct virtio_softc *vsc, uint16_t idx, uint32_t addr) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + bus_space_write_2(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_QUEUE_SELECT, + idx); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_QUEUE_ADDRESS, + addr); +} + +void +virtio_pci_set_status(struct virtio_softc *vsc, int status) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + int old = 0; + + if (status != 0) + old = bus_space_read_1(sc->sc_iot, sc->sc_ioh, + VIRTIO_CONFIG_DEVICE_STATUS); + bus_space_write_1(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_DEVICE_STATUS, + status|old); +} + +int +virtio_pci_match(struct device *parent, void *match, void *aux) +{ + struct pci_attach_args *pa; + + pa = (struct pci_attach_args *)aux; + if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_QUMRANET && + PCI_PRODUCT(pa->pa_id) >= 0x1000 && + PCI_PRODUCT(pa->pa_id) <= 0x103f && + PCI_REVISION(pa->pa_class) == 0) + return 1; + return 0; +} + +void +virtio_pci_attach(struct device *parent, struct device *self, void *aux) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self; + struct virtio_softc *vsc = &sc->sc_sc; + struct pci_attach_args *pa = (struct pci_attach_args *)aux; + pci_chipset_tag_t pc = pa->pa_pc; + pcitag_t tag = pa->pa_tag; + int revision; + pcireg_t id; + char const *intrstr; + pci_intr_handle_t ih; + + revision = PCI_REVISION(pa->pa_class); + if (revision != 0) { + printf("unknown revision 0x%02x; giving up\n", revision); + return; + } + + /* subsystem ID shows what I am */ + id = PCI_PRODUCT(pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG)); + printf(": Virtio %s Device", virtio_device_string(id)); + +#ifdef notyet + if (pci_get_capability(pc, tag, PCI_CAP_MSIX, NULL, NULL)) + printf(", msix capable"); +#endif + printf("\n"); + + vsc->sc_ops = &virtio_pci_ops; + sc->sc_pc = pc; + vsc->sc_dmat = pa->pa_dmat; + sc->sc_config_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI; + + if (pci_mapreg_map(pa, PCI_MAPREG_START, PCI_MAPREG_TYPE_IO, 0, + &sc->sc_iot, &sc->sc_ioh, NULL, &sc->sc_iosize, 0)) { + printf("can't map i/o space\n"); + return; + } + + virtio_device_reset(vsc); + virtio_pci_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK); + virtio_pci_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER); + + /* XXX: use softc as aux... */ + vsc->sc_childdevid = id; + vsc->sc_child = NULL; + config_found(self, sc, NULL); + if (vsc->sc_child == NULL) { + printf("no matching child driver; not configured\n"); + goto fail_1; + } + if (vsc->sc_child == VIRTIO_CHILD_ERROR) { + printf("virtio configuration failed\n"); + goto fail_1; + } + + if (pci_intr_map(pa, &ih)) { + printf("couldn't map interrupt\n"); + goto fail_2; + } + intrstr = pci_intr_string(pc, ih); + vsc->sc_ih = pci_intr_establish(pc, ih, vsc->sc_ipl, virtio_pci_intr, sc, vsc->sc_dev.dv_xname); + if (vsc->sc_ih == NULL) { + printf("couldn't establish interrupt"); + if (intrstr != NULL) + printf(" at %s", intrstr); + printf("\n"); + goto fail_2; + } + printf("%s: interrupting at %s\n", vsc->sc_dev.dv_xname, intrstr); + + virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK); + return; + +fail_2: + config_detach(vsc->sc_child, 0); +fail_1: + /* no pci_mapreg_unmap() or pci_intr_unmap() */ + virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED); +} + +int +virtio_pci_detach(struct device *self, int flags) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self; + struct virtio_softc *vsc = &sc->sc_sc; + int r; + + if (vsc->sc_child != 0 && vsc->sc_child != VIRTIO_CHILD_ERROR) { + r = config_detach(vsc->sc_child, flags); + if (r) + return r; + } + KASSERT(vsc->sc_child == 0 || vsc->sc_child == VIRTIO_CHILD_ERROR); + KASSERT(vsc->sc_vqs == 0); + pci_intr_disestablish(sc->sc_pc, vsc->sc_ih); + vsc->sc_ih = 0; + if (sc->sc_iosize) + bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize); + sc->sc_iosize = 0; + + return 0; +} + +/* + * Feature negotiation. + * Prints available / negotiated features if guest_feature_names != NULL and + * VIRTIO_DEBUG is 1 + */ +uint32_t +virtio_pci_negotiate_features(struct virtio_softc *vsc, uint32_t guest_features, + const struct virtio_feature_name *guest_feature_names) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + uint32_t host, neg; + + /* + * indirect descriptors can be switched off by setting bit 1 in the + * driver flags, see config(8) + */ + if (!(vsc->sc_dev.dv_cfdata->cf_flags & 1) && + !(vsc->sc_child->dv_cfdata->cf_flags & 1)) { + guest_features |= VIRTIO_F_RING_INDIRECT_DESC; + } else { + printf("RingIndirectDesc disabled by UKC\n"); + } + host = bus_space_read_4(sc->sc_iot, sc->sc_ioh, + VIRTIO_CONFIG_DEVICE_FEATURES); + neg = host & guest_features; +#if VIRTIO_DEBUG + if (guest_feature_names) + virtio_log_features(host, neg, guest_feature_names); +#endif + bus_space_write_4(sc->sc_iot, sc->sc_ioh, + VIRTIO_CONFIG_GUEST_FEATURES, neg); + vsc->sc_features = neg; + if (neg & VIRTIO_F_RING_INDIRECT_DESC) + vsc->sc_indirect = 1; + else + vsc->sc_indirect = 0; + + return neg; +} + +/* + * Device configuration registers. + */ +uint8_t +virtio_pci_read_device_config_1(struct virtio_softc *vsc, int index) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + return bus_space_read_1(sc->sc_iot, sc->sc_ioh, + sc->sc_config_offset + index); +} + +uint16_t +virtio_pci_read_device_config_2(struct virtio_softc *vsc, int index) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + return bus_space_read_2(sc->sc_iot, sc->sc_ioh, + sc->sc_config_offset + index); +} + +uint32_t +virtio_pci_read_device_config_4(struct virtio_softc *vsc, int index) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + return bus_space_read_4(sc->sc_iot, sc->sc_ioh, + sc->sc_config_offset + index); +} + +uint64_t +virtio_pci_read_device_config_8(struct virtio_softc *vsc, int index) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + uint64_t r; + + r = bus_space_read_4(sc->sc_iot, sc->sc_ioh, + sc->sc_config_offset + index + sizeof(uint32_t)); + r <<= 32; + r += bus_space_read_4(sc->sc_iot, sc->sc_ioh, + sc->sc_config_offset + index); + return r; +} + +void +virtio_pci_write_device_config_1(struct virtio_softc *vsc, + int index, uint8_t value) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + bus_space_write_1(sc->sc_iot, sc->sc_ioh, + sc->sc_config_offset + index, value); +} + +void +virtio_pci_write_device_config_2(struct virtio_softc *vsc, + int index, uint16_t value) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + bus_space_write_2(sc->sc_iot, sc->sc_ioh, + sc->sc_config_offset + index, value); +} + +void +virtio_pci_write_device_config_4(struct virtio_softc *vsc, + int index, uint32_t value) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + bus_space_write_4(sc->sc_iot, sc->sc_ioh, + sc->sc_config_offset + index, value); +} + +void +virtio_pci_write_device_config_8(struct virtio_softc *vsc, + int index, uint64_t value) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + bus_space_write_4(sc->sc_iot, sc->sc_ioh, + sc->sc_config_offset + index, + value & 0xffffffff); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, + sc->sc_config_offset + index + sizeof(uint32_t), + value >> 32); +} + +/* + * Interrupt handler. + */ +int +virtio_pci_intr(void *arg) +{ + struct virtio_pci_softc *sc = arg; + struct virtio_softc *vsc = &sc->sc_sc; + int isr, r = 0; + + /* check and ack the interrupt */ + isr = bus_space_read_1(sc->sc_iot, sc->sc_ioh, + VIRTIO_CONFIG_ISR_STATUS); + if (isr == 0) + return 0; + if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) && + (vsc->sc_config_change != NULL)) + r = (vsc->sc_config_change)(vsc); + if (vsc->sc_intrhand != NULL) + r |= (vsc->sc_intrhand)(vsc); + + return r; +} + +void +virtio_pci_kick(struct virtio_softc *vsc, uint16_t idx) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + bus_space_write_2(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_QUEUE_NOTIFY, + idx); +} diff --git a/sys/dev/pci/virtioreg.h b/sys/dev/pci/virtioreg.h new file mode 100644 index 00000000000..89a47ff095a --- /dev/null +++ b/sys/dev/pci/virtioreg.h @@ -0,0 +1,193 @@ +/* $NetBSD: virtioreg.h,v 1.1 2011/10/30 12:12:21 hannken Exp $ */ + +/* + * Copyright (c) 2012 Stefan Fritsch. + * Copyright (c) 2010 Minoura Makoto. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Part of the file derived from `Virtio PCI Card Specification v0.8.6 DRAFT' + * Appendix A. + */ +/* An interface for efficient virtio implementation. + * + * This header is BSD licensed so anyone can use the definitions + * to implement compatible drivers/servers. + * + * Copyright 2007, 2009, IBM Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#ifndef _DEV_PCI_VIRTIOREG_H_ +#define _DEV_PCI_VIRTIOREG_H_ + +#include <sys/types.h> + +/* Virtio product id (subsystem) */ +#define PCI_PRODUCT_VIRTIO_NETWORK 1 +#define PCI_PRODUCT_VIRTIO_BLOCK 2 +#define PCI_PRODUCT_VIRTIO_CONSOLE 3 +#define PCI_PRODUCT_VIRTIO_ENTROPY 4 +#define PCI_PRODUCT_VIRTIO_BALLOON 5 +#define PCI_PRODUCT_VIRTIO_IOMEM 6 +#define PCI_PRODUCT_VIRTIO_RPMSG 7 +#define PCI_PRODUCT_VIRTIO_SCSI 8 +#define PCI_PRODUCT_VIRTIO_9P 9 +#define PCI_PRODUCT_VIRTIO_MAC80211 10 + +/* Virtio header */ +#define VIRTIO_CONFIG_DEVICE_FEATURES 0 /* 32bit */ +#define VIRTIO_CONFIG_GUEST_FEATURES 4 /* 32bit */ +#define VIRTIO_F_NOTIFY_ON_EMPTY (1<<24) +#define VIRTIO_F_RING_INDIRECT_DESC (1<<28) +#define VIRTIO_F_RING_EVENT_IDX (1<<29) +#define VIRTIO_F_BAD_FEATURE (1<<30) +#define VIRTIO_CONFIG_QUEUE_ADDRESS 8 /* 32bit */ +#define VIRTIO_CONFIG_QUEUE_SIZE 12 /* 16bit */ +#define VIRTIO_CONFIG_QUEUE_SELECT 14 /* 16bit */ +#define VIRTIO_CONFIG_QUEUE_NOTIFY 16 /* 16bit */ +#define VIRTIO_CONFIG_DEVICE_STATUS 18 /* 8bit */ +#define VIRTIO_CONFIG_DEVICE_STATUS_RESET 0 +#define VIRTIO_CONFIG_DEVICE_STATUS_ACK 1 +#define VIRTIO_CONFIG_DEVICE_STATUS_DRIVER 2 +#define VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK 4 +#define VIRTIO_CONFIG_DEVICE_STATUS_FAILED 128 +#define VIRTIO_CONFIG_ISR_STATUS 19 /* 8bit */ +#define VIRTIO_CONFIG_ISR_CONFIG_CHANGE 2 +#define VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI 20 +/* Only if MSIX is enabled: */ +#define VIRTIO_MSI_CONFIG_VECTOR 20 /* 16bit, optional */ +#define VIRTIO_MSI_QUEUE_VECTOR 22 /* 16bit, optional */ +#define VIRTIO_CONFIG_DEVICE_CONFIG_MSI 24 + +/* Virtqueue */ +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 + +/* The Host uses this in used->flags to advise the Guest: don't kick me + * when you add a buffer. It's unreliable, so it's simply an + * optimization. Guest will still kick if it's out of buffers. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* The Guest uses this in avail->flags to advise the Host: don't + * interrupt me when you consume a buffer. It's unreliable, so it's + * simply an optimization. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + + +/* The standard layout for the ring is a continuous chunk of memory which + * looks like this. We assume num is a power of 2. + * + * struct vring { + * // The actual descriptors (16 bytes each) + * struct vring_desc desc[num]; + * + * // A ring of available descriptor heads with free-running index. + * __u16 avail_flags; + * __u16 avail_idx; + * __u16 available[num]; + * __u16 used_event_idx + * + * // Padding to the next align boundary. + * char pad[]; + * + * // A ring of used descriptor heads with free-running index. + * __u16 used_flags; + * __u16 used_idx; + * struct vring_used_elem used[num]; + * __u16 avail_event_idx; + * }; + * Note: for virtio PCI, align is 4096. + */ + +/* Virtio ring descriptors: 16 bytes. + * These can chain together via "next". */ +struct vring_desc { + /* Address (guest-physical). */ + uint64_t addr; + /* Length. */ + uint32_t len; + /* The flags as indicated above. */ + uint16_t flags; + /* We chain unused descriptors via this, too */ + uint16_t next; +} __packed; + +struct vring_avail { + uint16_t flags; + uint16_t idx; + uint16_t ring[0]; +} __packed; + +/* u32 is used here for ids for padding reasons. */ +struct vring_used_elem { + /* Index of start of used descriptor chain. */ + uint32_t id; + /* Total length of the descriptor chain which was written to. */ + uint32_t len; +} __packed; + +struct vring_used { + uint16_t flags; + uint16_t idx; + struct vring_used_elem ring[0]; +} __packed; + +/* + * We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. + */ +#define VQ_USED_EVENT(vq) (*(uint16_t*)(&(vq)->vq_avail->ring[(vq)->vq_num])) +#define VQ_AVAIL_EVENT(vq) (*(uint16_t*)(&(vq)->vq_used->ring[(vq)->vq_num])) + +#define VIRTIO_PAGE_SIZE (4096) + +#endif /* _DEV_PCI_VIRTIOREG_H_ */ diff --git a/sys/dev/pci/virtiovar.h b/sys/dev/pci/virtiovar.h new file mode 100644 index 00000000000..1dc8db5e31e --- /dev/null +++ b/sys/dev/pci/virtiovar.h @@ -0,0 +1,229 @@ +/* $NetBSD: virtiovar.h,v 1.1 2011/10/30 12:12:21 hannken Exp $ */ + +/* + * Copyright (c) 2012 Stefan Fritsch. + * Copyright (c) 2010 Minoura Makoto. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Part of the file derived from `Virtio PCI Card Specification v0.8.6 DRAFT' + * Appendix A. + */ +/* An interface for efficient virtio implementation. + * + * This header is BSD licensed so anyone can use the definitions + * to implement compatible drivers/servers. + * + * Copyright 2007, 2009, IBM Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#ifndef _DEV_PCI_VIRTIOVAR_H_ +#define _DEV_PCI_VIRTIOVAR_H_ + +#include <sys/types.h> +#include <sys/queue.h> +#include <sys/param.h> +#include <sys/device.h> +#include <sys/mutex.h> +#include <machine/bus.h> + +#include <dev/pci/virtioreg.h> + +#ifndef VIRTIO_DEBUG +#define VIRTIO_DEBUG 0 +#endif + +struct vq_entry { + SIMPLEQ_ENTRY(vq_entry) qe_list; /* free list */ + uint16_t qe_index; /* index in vq_desc array */ + /* followings are used only when it is the `head' entry */ + int16_t qe_next; /* next enq slot */ + int qe_indirect; /* 1 if using indirect */ + struct vring_desc *qe_desc_base; +}; + +struct virtqueue { + struct virtio_softc *vq_owner; + unsigned int vq_num; /* queue size (# of entries) */ + unsigned int vq_mask; /* (1 << vq_num - 1) */ + int vq_index; /* queue number (0, 1, ...) */ + + /* vring pointers (KVA) */ + struct vring_desc *vq_desc; + struct vring_avail *vq_avail; + struct vring_used *vq_used; + void *vq_indirect; + + /* virtqueue allocation info */ + void *vq_vaddr; + int vq_availoffset; + int vq_usedoffset; + int vq_indirectoffset; + bus_dma_segment_t vq_segs[1]; + unsigned int vq_bytesize; + bus_dmamap_t vq_dmamap; + + int vq_maxsegsize; + int vq_maxnsegs; + + /* free entry management */ + struct vq_entry *vq_entries; + SIMPLEQ_HEAD(, vq_entry) vq_freelist; + struct mutex *vq_freelist_lock; + + /* enqueue/dequeue status */ + uint16_t vq_avail_idx; + uint16_t vq_avail_signalled; + uint16_t vq_used_idx; + int vq_queued; + struct mutex *vq_aring_lock; + struct mutex *vq_uring_lock; + + /* interrupt handler */ + int (*vq_done)(struct virtqueue*); +}; + +struct virtio_feature_name { + uint32_t bit; + const char *name; +}; + +struct virtio_ops { + void (*kick)(struct virtio_softc *, uint16_t); + uint8_t (*read_dev_cfg_1)(struct virtio_softc *, int); + uint16_t (*read_dev_cfg_2)(struct virtio_softc *, int); + uint32_t (*read_dev_cfg_4)(struct virtio_softc *, int); + uint64_t (*read_dev_cfg_8)(struct virtio_softc *, int); + void (*write_dev_cfg_1)(struct virtio_softc *, int, uint8_t); + void (*write_dev_cfg_2)(struct virtio_softc *, int, uint16_t); + void (*write_dev_cfg_4)(struct virtio_softc *, int, uint32_t); + void (*write_dev_cfg_8)(struct virtio_softc *, int, uint64_t); + uint16_t (*read_queue_size)(struct virtio_softc *, uint16_t); + void (*write_queue_addr)(struct virtio_softc *, uint16_t, uint32_t); + void (*set_status)(struct virtio_softc *, int); + uint32_t (*neg_features)(struct virtio_softc *, uint32_t, const struct virtio_feature_name *); + int (*intr)(void *); +}; + +#define VIRTIO_CHILD_ERROR ((void*)1) + +struct virtio_softc { + struct device sc_dev; + bus_dma_tag_t sc_dmat; /* set by transport */ + struct virtio_ops *sc_ops; /* set by transport */ + + int sc_ipl; /* set by child */ + void *sc_ih; /* set by transport */ + + uint32_t sc_features; + int sc_indirect; + + int sc_nvqs; /* set by child */ + struct virtqueue *sc_vqs; /* set by child */ + + int sc_childdevid; /* set by transport */ + struct device *sc_child; /* set by child, + * VIRTIO_CHILD_ERROR on error + */ + int (*sc_config_change)(struct virtio_softc*); + /* set by child */ + int (*sc_intrhand)(struct virtio_softc*); + /* set by child */ +}; + +/* public interface */ +#define virtio_read_device_config_1(sc, o) (sc)->sc_ops->read_dev_cfg_1(sc, o) +#define virtio_read_device_config_2(sc, o) (sc)->sc_ops->read_dev_cfg_2(sc, o) +#define virtio_read_device_config_4(sc, o) (sc)->sc_ops->read_dev_cfg_4(sc, o) +#define virtio_read_device_config_8(sc, o) (sc)->sc_ops->read_dev_cfg_8(sc, o) +#define virtio_write_device_config_1(sc, o, v) (sc)->sc_ops->write_dev_cfg_1(sc, o, v) +#define virtio_write_device_config_2(sc, o, v) (sc)->sc_ops->write_dev_cfg_2(sc, o, v) +#define virtio_write_device_config_4(sc, o, v) (sc)->sc_ops->write_dev_cfg_4(sc, o, v) +#define virtio_write_device_config_8(sc, o, v) (sc)->sc_ops->write_dev_cfg_8(sc, o, v) +#define virtio_read_queue_size(sc, i) (sc)->sc_ops->read_queue_size(sc, i) +#define virtio_write_queue_address(sc, i, v) (sc)->sc_ops->write_queue_addr(sc, i, v) +#define virtio_negotiate_features(sc, f, n) (sc)->sc_ops->neg_features(sc, f, n) + +int virtio_alloc_vq(struct virtio_softc*, struct virtqueue*, int, int, int, + const char*); +int virtio_free_vq(struct virtio_softc*, struct virtqueue*); +void virtio_reset(struct virtio_softc *); +void virtio_reinit_start(struct virtio_softc *); +void virtio_reinit_end(struct virtio_softc *); + +int virtio_enqueue_prep(struct virtqueue*, int*); +int virtio_enqueue_reserve(struct virtqueue*, int, int); +int virtio_enqueue(struct virtqueue*, int, bus_dmamap_t, int); +int virtio_enqueue_p(struct virtqueue*, int, bus_dmamap_t, bus_addr_t, + bus_size_t, int); +int virtio_enqueue_commit(struct virtio_softc*, struct virtqueue*, int, int); +#define virtio_notify(sc,vq) virtio_enqueue_commit(sc, vq, -1, 1) + +int virtio_enqueue_abort(struct virtqueue*, int); + +int virtio_dequeue(struct virtio_softc*, struct virtqueue*, int *, int *); +int virtio_dequeue_commit(struct virtqueue*, int); + +int virtio_intr(void *arg); +int virtio_vq_intr(struct virtio_softc *); +void virtio_stop_vq_intr(struct virtio_softc *, struct virtqueue *); +int virtio_start_vq_intr(struct virtio_softc *, struct virtqueue *); + +const char *virtio_device_string(int); +void virtio_log_features(uint32_t, uint32_t, const struct virtio_feature_name *); + +#if VIRTIO_DEBUG +void virtio_vq_dump(struct virtqueue *vq); +#endif +int virtio_nused(struct virtqueue *vq); +int virtio_postpone_intr(struct virtqueue *vq, uint16_t nslots); +int virtio_postpone_intr_smart(struct virtqueue *vq); +int virtio_postpone_intr_far(struct virtqueue *vq); + +#endif /* _DEV_PCI_VIRTIOVAR_H_ */ |