/*	$OpenBSD: virtio.c,v 1.5 2013/03/10 21:58:02 sf Exp $	*/
/*	$NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $	*/

/*
 * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
 * Copyright (c) 2010 Minoura Makoto.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/mutex.h>

#include <dev/pci/pcidevs.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>

#include <dev/pci/virtioreg.h>
#include <dev/pci/virtiovar.h>

#define MINSEG_INDIRECT		2 /* use indirect if nsegs >= this value */

#if VIRTIO_DEBUG
#define VIRITO_ASSERT(x)	KASSERT(x)
#else
#define VIRITO_ASSERT(x)
#endif

void		 virtio_init_vq(struct virtio_softc *,
				struct virtqueue *, int);
void		 vq_free_entry(struct virtqueue *, struct vq_entry *);
void		 vq_free_entry_locked(struct virtqueue *, struct vq_entry *);
struct vq_entry	*vq_alloc_entry(struct virtqueue *);

struct cfdriver virtio_cd = {
	NULL, "virtio", DV_DULL
};

#define virtio_set_status(sc, s) (sc)->sc_ops->set_status(sc, s)
#define virtio_device_reset(sc)	virtio_set_status((sc), 0)

static const char * const virtio_device_name[] = {
	"Unknown (0)",		/* 0 */
	"Network",		/* 1 */
	"Block",		/* 2 */
	"Console",		/* 3 */
	"Entropy",		/* 4 */
	"Memory Balloon",	/* 5 */
	"IO Memory",		/* 6 */
	"Rpmsg",		/* 7 */
	"SCSI host",		/* 8 */
	"9P Transport"		/* 9 */
	"mac80211 wlan"		/* 10 */
};
#define NDEVNAMES	(sizeof(virtio_device_name)/sizeof(char*))

static const struct virtio_feature_name transport_feature_names[] = {
	{ VIRTIO_F_NOTIFY_ON_EMPTY,	"NotifyOnEmpty"},
	{ VIRTIO_F_RING_INDIRECT_DESC,	"RingIndirectDesc"},
	{ VIRTIO_F_RING_EVENT_IDX,	"RingEventIdx"},
	{ VIRTIO_F_BAD_FEATURE,		"BadFeature"},
	{ 0,				NULL}
};

const char *
virtio_device_string(int id)
{
	return id < NDEVNAMES ? virtio_device_name[id] : "Unknown";
}

void
virtio_log_features(uint32_t host, uint32_t neg,
		    const struct virtio_feature_name *guest_feature_names)
{
	const struct virtio_feature_name *namep;
	int i;
	char c;
	uint32_t bit;

	for (i = 0; i < 32; i++) {
		if (i == 30) {
			/*
			 * VIRTIO_F_BAD_FEATURE is only used for
			 * checking correct negotiation
			 */
			continue;
		}
		bit = 1 << i;
		if ((host&bit) == 0)
			continue;
		namep = (i < 24) ? guest_feature_names :
		    transport_feature_names;
		while (namep->bit && namep->bit != bit)
			namep++;
		c = (neg&bit) ? '+' : '-';
		if (namep->name)
			printf(" %c%s", c, namep->name);
		else
			printf(" %cUnknown(%d)", c, i);
	}
}

/*
 * Reset the device.
 */
/*
 * To reset the device to a known state, do following:
 *	virtio_reset(sc);	     // this will stop the device activity
 *	<dequeue finished requests>; // virtio_dequeue() still can be called
 *	<revoke pending requests in the vqs if any>;
 *	virtio_reinit_start(sc);     // dequeue prohibitted
 *	newfeatures = virtio_negotiate_features(sc, requestedfeatures);
 *	<some other initialization>;
 *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
 * Once attached, feature negotiation can only be allowed after virtio_reset.
 */
void
virtio_reset(struct virtio_softc *sc)
{
	virtio_device_reset(sc);
}

void
virtio_reinit_start(struct virtio_softc *sc)
{
	int i;

	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
	for (i = 0; i < sc->sc_nvqs; i++) {
		int n;
		struct virtqueue *vq = &sc->sc_vqs[i];
		n = virtio_read_queue_size(sc, vq->vq_index);
		if (n == 0)	/* vq disappeared */
			continue;
		if (n != vq->vq_num) {
			panic("%s: virtqueue size changed, vq index %d\n",
			      sc->sc_dev.dv_xname, vq->vq_index);
		}
		virtio_init_vq(sc, vq, 1);
		virtio_setup_queue(sc, vq->vq_index,
		    vq->vq_dmamap->dm_segs[0].ds_addr / VIRTIO_PAGE_SIZE);
	}
}

void
virtio_reinit_end(struct virtio_softc *sc)
{
	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
}

/*
 * dmamap sync operations for a virtqueue.
 */
static inline void
vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
	/* availoffset == sizeof(vring_desc)*vq_num */
	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
			ops);
}

static inline void
vq_sync_aring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
			vq->vq_availoffset,
			offsetof(struct vring_avail, ring)
			 + vq->vq_num * sizeof(uint16_t),
			ops);
}

static inline void
vq_sync_uring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
			vq->vq_usedoffset,
			offsetof(struct vring_used, ring)
			 + vq->vq_num * sizeof(struct vring_used_elem),
			ops);
}

static inline void
vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
		     int ops)
{
	int offset = vq->vq_indirectoffset
		      + sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;

	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
			offset, sizeof(struct vring_desc) * vq->vq_maxnsegs,
			ops);
}

/*
 * Can be used as sc_intrhand.
 */
/*
 * Scan vq, bus_dmamap_sync for the vqs (not for the payload),
 * and calls (*vq_done)() if some entries are consumed.
 */
int
virtio_vq_intr(struct virtio_softc *sc)
{
	struct virtqueue *vq;
	int i, r = 0;

	/* going backwards is better for if_vio */
	for (i = sc->sc_nvqs - 1; i >= 0; i--) {
		vq = &sc->sc_vqs[i];
		if (vq->vq_queued) {
			vq->vq_queued = 0;
			vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
		}
		vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
		if (vq->vq_used_idx != vq->vq_used->idx) {
			if (vq->vq_done)
				r |= (vq->vq_done)(vq);
		}
	}

	return r;
}

/*
 * Initialize vq structure.
 */
void
virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq, int reinit)
{
	int i, j;
	int vq_size = vq->vq_num;

	memset(vq->vq_vaddr, 0, vq->vq_bytesize);

	/* build the indirect descriptor chain */
	if (vq->vq_indirect != NULL) {
		struct vring_desc *vd;

		for (i = 0; i < vq_size; i++) {
			vd = vq->vq_indirect;
			vd += vq->vq_maxnsegs * i;
			for (j = 0; j < vq->vq_maxnsegs-1; j++)
				vd[j].next = j + 1;
		}
	}

	/* free slot management */
	SIMPLEQ_INIT(&vq->vq_freelist);
	for (i = 0; i < vq_size; i++) {
		SIMPLEQ_INSERT_TAIL(&vq->vq_freelist,
				    &vq->vq_entries[i], qe_list);
		vq->vq_entries[i].qe_index = i;
	}

	/* enqueue/dequeue status */
	vq->vq_avail_idx = 0;
	vq->vq_avail_signalled = 0xffff;
	vq->vq_used_idx = 0;
	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
	vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
	vq->vq_queued = 1;
}

/*
 * Allocate/free a vq.
 */
int
virtio_alloc_vq(struct virtio_softc *sc,
		struct virtqueue *vq, int index, int maxsegsize, int maxnsegs,
		const char *name)
{
	int vq_size, allocsize1, allocsize2, allocsize3, allocsize = 0;
	int rsegs, r, hdrlen;
#define VIRTQUEUE_ALIGN(n)	(((n)+(VIRTIO_PAGE_SIZE-1))&	\
				 ~(VIRTIO_PAGE_SIZE-1))

	memset(vq, 0, sizeof(*vq));

	vq_size = virtio_read_queue_size(sc, index);
	if (vq_size == 0) {
		printf("virtqueue not exist, index %d for %s\n", index, name);
		goto err;
	}
	if (((vq_size - 1) & vq_size) != 0)
		panic("vq_size not power of two: %d", vq_size);

	hdrlen = (sc->sc_features & VIRTIO_F_RING_EVENT_IDX) ? 3 : 2;

	/* allocsize1: descriptor table + avail ring + pad */
	allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size
				     + sizeof(uint16_t) * (hdrlen + vq_size));
	/* allocsize2: used ring + pad */
	allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * hdrlen
				     + sizeof(struct vring_used_elem)*vq_size);
	/* allocsize3: indirect table */
	/* XXX: This is rather inefficient. In practice only a fraction of this
	 * XXX: memory will be used.
	 */
	if (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT)
		allocsize3 = sizeof(struct vring_desc) * maxnsegs * vq_size;
	else
		allocsize3 = 0;
	allocsize = allocsize1 + allocsize2 + allocsize3;

	/* alloc and map the memory */
	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
			     &vq->vq_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
	if (r != 0) {
		printf("virtqueue %d for %s allocation failed, error %d\n",
		       index, name, r);
		goto err;
	}
	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], 1, allocsize,
			   (caddr_t*)&vq->vq_vaddr, BUS_DMA_NOWAIT);
	if (r != 0) {
		printf("virtqueue %d for %s map failed, error %d\n",
		       index, name, r);
		goto err;
	}
	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
			      BUS_DMA_NOWAIT, &vq->vq_dmamap);
	if (r != 0) {
		printf("virtqueue %d for %s dmamap creation failed, error %d\n",
		       index, name, r);
		goto err;
	}
	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap,
			    vq->vq_vaddr, allocsize, NULL, BUS_DMA_NOWAIT);
	if (r != 0) {
		printf("virtqueue %d for %s dmamap load failed, error %d\n",
		       index, name, r);
		goto err;
	}

	virtio_setup_queue(sc, index,
	    vq->vq_dmamap->dm_segs[0].ds_addr / VIRTIO_PAGE_SIZE);

	/* remember addresses and offsets for later use */
	vq->vq_owner = sc;
	vq->vq_num = vq_size;
	vq->vq_mask = vq_size - 1;
	vq->vq_index = index;
	vq->vq_desc = vq->vq_vaddr;
	vq->vq_availoffset = sizeof(struct vring_desc)*vq_size;
	vq->vq_avail = (struct vring_avail*)(((char*)vq->vq_desc) +
						vq->vq_availoffset);
	vq->vq_usedoffset = allocsize1;
	vq->vq_used = (struct vring_used*)(((char*)vq->vq_desc) +
						vq->vq_usedoffset);
	if (allocsize3 > 0) {
		vq->vq_indirectoffset = allocsize1 + allocsize2;
		vq->vq_indirect = (void*)(((char*)vq->vq_desc)
					  + vq->vq_indirectoffset);
	}
	vq->vq_bytesize = allocsize;
	vq->vq_maxsegsize = maxsegsize;
	vq->vq_maxnsegs = maxnsegs;

	/* free slot management */
	vq->vq_entries = malloc(sizeof(struct vq_entry)*vq_size,
				     M_DEVBUF, M_NOWAIT | M_ZERO);
	if (vq->vq_entries == NULL) {
		r = ENOMEM;
		goto err;
	}

	virtio_init_vq(sc, vq, 0);

#if VIRTIO_DEBUG
	printf("\nallocated %u byte for virtqueue %d for %s, size %d\n",
	       allocsize, index, name, vq_size);
	if (allocsize3 > 0)
		printf("using %d byte (%d entries) indirect descriptors\n",
		       allocsize3, maxnsegs * vq_size);
#endif
	return 0;

err:
	virtio_setup_queue(sc, index, 0);
	if (vq->vq_dmamap)
		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
	if (vq->vq_vaddr)
		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
	if (vq->vq_segs[0].ds_addr)
		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
	memset(vq, 0, sizeof(*vq));

	return -1;
}

int
virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
{
	struct vq_entry *qe;
	int i = 0;

	/* device must be already deactivated */
	/* confirm the vq is empty */
	SIMPLEQ_FOREACH(qe, &vq->vq_freelist, qe_list) {
		i++;
	}
	if (i != vq->vq_num) {
		printf("%s: freeing non-empty vq, index %d\n",
		       sc->sc_dev.dv_xname, vq->vq_index);
		return EBUSY;
	}

	/* tell device that there's no virtqueue any longer */
	virtio_setup_queue(sc, vq->vq_index, 0);

	free(vq->vq_entries, M_DEVBUF);
	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
	memset(vq, 0, sizeof(*vq));

	return 0;
}

/*
 * Free descriptor management.
 */
struct vq_entry *
vq_alloc_entry(struct virtqueue *vq)
{
	struct vq_entry *qe;

	if (SIMPLEQ_EMPTY(&vq->vq_freelist))
		return NULL;
	qe = SIMPLEQ_FIRST(&vq->vq_freelist);
	SIMPLEQ_REMOVE_HEAD(&vq->vq_freelist, qe_list);

	return qe;
}

void
vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
{
	SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, qe, qe_list);
	return;
}

void
vq_free_entry_locked(struct virtqueue *vq, struct vq_entry *qe)
{
	SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, qe, qe_list);
}

/*
 * Enqueue several dmamaps as a single request.
 */
/*
 * Typical usage:
 *  <queue size> number of followings are stored in arrays
 *  - command blocks (in dmamem) should be pre-allocated and mapped
 *  - dmamaps for command blocks should be pre-allocated and loaded
 *  - dmamaps for payload should be pre-allocated
 *      r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
 *	if (r)		// currently 0 or EAGAIN
 *	  return r;
 *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
 *	if (r) {
 *	  virtio_enqueue_abort(sc, vq, slot);
 *	  bus_dmamap_unload(dmat, dmamap_payload[slot]);
 *	  return r;
 *	}
 *	r = virtio_enqueue_reserve(sc, vq, slot,
 *				   dmamap_payload[slot]->dm_nsegs+1);
 *							// ^ +1 for command
 *	if (r) {	// currently 0 or EAGAIN
 *	  bus_dmamap_unload(dmat, dmamap_payload[slot]);
 *	  return r;					// do not call abort()
 *	}
 *	<setup and prepare commands>
 *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
 *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
 *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], 0);
 *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
 *	virtio_enqueue_commit(sc, vq, slot, 1);
 */

/*
 * enqueue_prep: allocate a slot number
 */
int
virtio_enqueue_prep(struct virtqueue *vq, int *slotp)
{
	struct vq_entry *qe1;

	VIRITO_ASSERT(slotp != NULL);

	qe1 = vq_alloc_entry(vq);
	if (qe1 == NULL)
		return EAGAIN;
	/* next slot is not allocated yet */
	qe1->qe_next = -1;
	*slotp = qe1->qe_index;

	return 0;
}

/*
 * enqueue_reserve: allocate remaining slots and build the descriptor chain.
 * Calls virtio_enqueue_abort() on failure.
 */
int
virtio_enqueue_reserve(struct virtqueue *vq, int slot, int nsegs)
{
	int indirect;
	struct vq_entry *qe1 = &vq->vq_entries[slot];

	VIRITO_ASSERT(qe1->qe_next == -1);
	VIRITO_ASSERT(1 <= nsegs && nsegs <= vq->vq_num);

	if ((vq->vq_indirect != NULL) &&
	    (nsegs >= MINSEG_INDIRECT) &&
	    (nsegs <= vq->vq_maxnsegs))
		indirect = 1;
	else
		indirect = 0;
	qe1->qe_indirect = indirect;

	if (indirect) {
		struct vring_desc *vd;
		int i;

		vd = &vq->vq_desc[qe1->qe_index];
		vd->addr = vq->vq_dmamap->dm_segs[0].ds_addr
			+ vq->vq_indirectoffset;
		vd->addr += sizeof(struct vring_desc)
			* vq->vq_maxnsegs * qe1->qe_index;
		vd->len = sizeof(struct vring_desc) * nsegs;
		vd->flags = VRING_DESC_F_INDIRECT;

		vd = vq->vq_indirect;
		vd += vq->vq_maxnsegs * qe1->qe_index;
		qe1->qe_desc_base = vd;

		for (i = 0; i < nsegs-1; i++) {
			vd[i].flags = VRING_DESC_F_NEXT;
		}
		vd[i].flags = 0;
		qe1->qe_next = 0;

		return 0;
	} else {
		struct vring_desc *vd;
		struct vq_entry *qe;
		int i, s;

		vd = &vq->vq_desc[0];
		qe1->qe_desc_base = vd;
		qe1->qe_next = qe1->qe_index;
		s = slot;
		for (i = 0; i < nsegs - 1; i++) {
			qe = vq_alloc_entry(vq);
			if (qe == NULL) {
				vd[s].flags = 0;
				virtio_enqueue_abort(vq, slot);
				return EAGAIN;
			}
			vd[s].flags = VRING_DESC_F_NEXT;
			vd[s].next = qe->qe_index;
			s = qe->qe_index;
		}
		vd[s].flags = 0;

		return 0;
	}
}

/*
 * enqueue: enqueue a single dmamap.
 */
int
virtio_enqueue(struct virtqueue *vq, int slot, bus_dmamap_t dmamap, int write)
{
	struct vq_entry *qe1 = &vq->vq_entries[slot];
	struct vring_desc *vd = qe1->qe_desc_base;
	int i;
	int s = qe1->qe_next;

	VIRITO_ASSERT(s >= 0);
	VIRITO_ASSERT(dmamap->dm_nsegs > 0);
	if (dmamap->dm_nsegs > vq->vq_maxnsegs) {
#if VIRTIO_DEBUG
		for (i = 0; i < dmamap->dm_nsegs; i++) {
			printf(" %d (%d): %p %u \n", i, write,
			    dmamap->dm_segs[i].ds_addr,
			    dmamap->dm_segs[i].ds_len);
		}
#endif
		panic("dmamap->dm_nseg %d > vq->vq_maxnsegs %d\n",
		    dmamap->dm_nsegs, vq->vq_maxnsegs);
	}

	for (i = 0; i < dmamap->dm_nsegs; i++) {
		vd[s].addr = dmamap->dm_segs[i].ds_addr;
		vd[s].len = dmamap->dm_segs[i].ds_len;
		if (!write)
			vd[s].flags |= VRING_DESC_F_WRITE;
		s = vd[s].next;
	}
	qe1->qe_next = s;

	return 0;
}

int
virtio_enqueue_p(struct virtqueue *vq, int slot, bus_dmamap_t dmamap,
		 bus_addr_t start, bus_size_t len, int write)
{
	struct vq_entry *qe1 = &vq->vq_entries[slot];
	struct vring_desc *vd = qe1->qe_desc_base;
	int s = qe1->qe_next;

	VIRITO_ASSERT(s >= 0);
	/* XXX todo: handle more segments */
	VIRITO_ASSERT(dmamap->dm_nsegs == 1);
	VIRITO_ASSERT((dmamap->dm_segs[0].ds_len > start) &&
	    (dmamap->dm_segs[0].ds_len >= start + len));

	vd[s].addr = dmamap->dm_segs[0].ds_addr + start;
	vd[s].len = len;
	if (!write)
		vd[s].flags |= VRING_DESC_F_WRITE;
	qe1->qe_next = vd[s].next;

	return 0;
}

static void
publish_avail_idx(struct virtio_softc *sc, struct virtqueue *vq)
{
	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
	vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
	vq->vq_avail->idx = vq->vq_avail_idx;
	vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
	vq->vq_queued = 1;
	vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
}

/*
 * enqueue_commit: add it to the aring.
 */
int
virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq,
		      int slot, int notifynow)
{
	struct vq_entry *qe1;

	if (slot < 0)
		goto notify;
	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
	qe1 = &vq->vq_entries[slot];
	if (qe1->qe_indirect)
		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
	vq->vq_avail->ring[(vq->vq_avail_idx++) & vq->vq_mask] = slot;

notify:
	if (notifynow) {
		if (vq->vq_owner->sc_features & VIRTIO_F_RING_EVENT_IDX) {
			uint16_t o = vq->vq_avail_signalled;
			uint16_t n = vq->vq_avail_idx;
			uint16_t t = VQ_AVAIL_EVENT(vq) + 1;
			publish_avail_idx(sc, vq);
			if ((o < n && o < t && t <= n)
			    || (o > n && (o < t || t <= n))) {
				sc->sc_ops->kick(sc, vq->vq_index);
				vq->vq_avail_signalled = n;
			}
		} else {
			publish_avail_idx(sc, vq);
			if (!(vq->vq_used->flags & VRING_USED_F_NO_NOTIFY))
				sc->sc_ops->kick(sc, vq->vq_index);
		}
	}

	return 0;
}

/*
 * enqueue_abort: rollback.
 */
int
virtio_enqueue_abort(struct virtqueue *vq, int slot)
{
	struct vq_entry *qe = &vq->vq_entries[slot];
	struct vring_desc *vd;
	int s;

	if (qe->qe_next < 0) {
		vq_free_entry(vq, qe);
		return 0;
	}

	s = slot;
	vd = &vq->vq_desc[0];
	while (vd[s].flags & VRING_DESC_F_NEXT) {
		s = vd[s].next;
		vq_free_entry_locked(vq, qe);
		qe = &vq->vq_entries[s];
	}
	vq_free_entry_locked(vq, qe);
	return 0;
}

/*
 * Dequeue a request.
 */
/*
 * dequeue: dequeue a request from uring; dmamap_sync for uring is
 *	    already done in the interrupt handler.
 */
int
virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
	       int *slotp, int *lenp)
{
	uint16_t slot, usedidx;
	struct vq_entry *qe;

	if (vq->vq_used_idx == vq->vq_used->idx)
		return ENOENT;
	usedidx = vq->vq_used_idx++;
	usedidx &= vq->vq_mask;
	slot = vq->vq_used->ring[usedidx].id;
	qe = &vq->vq_entries[slot];

	if (qe->qe_indirect)
		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);

	if (slotp)
		*slotp = slot;
	if (lenp)
		*lenp = vq->vq_used->ring[usedidx].len;

	return 0;
}

/*
 * dequeue_commit: complete dequeue; the slot is recycled for future use.
 *                 if you forget to call this the slot will be leaked.
 */
int
virtio_dequeue_commit(struct virtqueue *vq, int slot)
{
	struct vq_entry *qe = &vq->vq_entries[slot];
	struct vring_desc *vd = &vq->vq_desc[0];
	int s = slot;

	while (vd[s].flags & VRING_DESC_F_NEXT) {
		s = vd[s].next;
		vq_free_entry_locked(vq, qe);
		qe = &vq->vq_entries[s];
	}
	vq_free_entry_locked(vq, qe);

	return 0;
}

/*
 * Increase the event index in order to delay interrupts.
 * Returns 0 on success; returns 1 if the used ring has already advanced
 * too far, and the caller must process the queue again (otherewise, no
 * more interrupts will happen).
 */
int
virtio_postpone_intr(struct virtqueue *vq, uint16_t nslots)
{
	uint16_t	idx;

	idx = vq->vq_used_idx + nslots;

	/* set the new event index: avail_ring->used_event = idx */
	VQ_USED_EVENT(vq) = idx;

	vq_sync_aring(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
	vq->vq_queued++;

	if (nslots < virtio_nused(vq))
		return 1;

	return 0;
}

/*
 * Postpone interrupt until 3/4 of the available descriptors have been
 * consumed.
 */
int
virtio_postpone_intr_smart(struct virtqueue *vq)
{
	uint16_t	nslots;

	nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx) * 3 / 4;

	return virtio_postpone_intr(vq, nslots);
}

/*
 * Postpone interrupt until all of the available descriptors have been
 * consumed.
 */
int
virtio_postpone_intr_far(struct virtqueue *vq)
{
	uint16_t	nslots;

	nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx);

	return virtio_postpone_intr(vq, nslots);
}


/*
 * Start/stop vq interrupt.  No guarantee.
 */
void
virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
{
	if ((sc->sc_features & VIRTIO_F_RING_EVENT_IDX)) {
		/*
		 * No way to disable the interrupt completely with
		 * RingEventIdx. Instead advance used_event by half
		 * the possible value. This won't happen soon and
		 * is far enough in the past to not trigger a spurios
		 * interrupt.
		 */
		VQ_USED_EVENT(vq) = vq->vq_used_idx + 0x8000;
	} else {
		vq->vq_avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
	}
	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
	vq->vq_queued++;
}

int
virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
{
	/*
	 * If event index feature is negotiated, enabling
	 * interrupts is done through setting the latest
	 * consumed index in the used_event field
	 */
	if (sc->sc_features & VIRTIO_F_RING_EVENT_IDX)
		VQ_USED_EVENT(vq) = vq->vq_used_idx;
	else
		vq->vq_avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;

	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
	vq->vq_queued++;

	if (vq->vq_used_idx != vq->vq_used->idx)
		return 1;

	return 0;
}

/*
 * Returns a number of slots in the used ring available to
 * be supplied to the avail ring.
 */
int
virtio_nused(struct virtqueue *vq)
{
	uint16_t	n;

	n = (uint16_t)(vq->vq_used->idx - vq->vq_used_idx);
	VIRITO_ASSERT(n <= vq->vq_num);

	return n;
}

#if VIRTIO_DEBUG
void
virtio_vq_dump(struct virtqueue *vq)
{
	/* Common fields */
	printf(" + vq num: %d\n", vq->vq_num);
	printf(" + vq mask: 0x%X\n", vq->vq_mask);
	printf(" + vq index: %d\n", vq->vq_index);
	printf(" + vq used idx: %d\n", vq->vq_used_idx);
	printf(" + vq avail idx: %d\n", vq->vq_avail_idx);
	printf(" + vq queued: %d\n",vq->vq_queued);
	/* Avail ring fields */
	printf(" + avail flags: 0x%X\n", vq->vq_avail->flags);
	printf(" + avail idx: %d\n", vq->vq_avail->idx);
	printf(" + avail event: %d\n", VQ_AVAIL_EVENT(vq));
	/* Used ring fields */
	printf(" + used flags: 0x%X\n",vq->vq_used->flags);
	printf(" + used idx: %d\n",vq->vq_used->idx);
	printf(" + used event: %d\n", VQ_USED_EVENT(vq));
	printf(" +++++++++++++++++++++++++++\n");
}
#endif