diff options
author | Owain Ainsworth <oga@cvs.openbsd.org> | 2009-06-06 06:02:45 +0000 |
---|---|---|
committer | Owain Ainsworth <oga@cvs.openbsd.org> | 2009-06-06 06:02:45 +0000 |
commit | ec5925b89b62e83010dd33bc1ad8c5ab48d6afd5 (patch) | |
tree | 179ac9a6745a8c827f87317b737aa98131af64cf | |
parent | 8d0733a46ab21b9a74bb53e474cab34e0e458771 (diff) |
add an interface to agp to create a bus_dma_tag over a range of the
aperture, which will take your memory, bind it to agp, and return you the
aperture address. It's essentially the same as iommu on amd64 in the way it
works.
This will be used by the upcoming (works but is slow and will not be
enabled at first) drm memory management code for intel igp chipsets.
Right now the sync function for intagp is really slow (doing a wbinvd()
on every sync), this is in the process of getting fixed, but the size of
the diffs in my trees was getting silly.
-rw-r--r-- | sys/arch/amd64/pci/agp_machdep.c | 181 | ||||
-rw-r--r-- | sys/arch/i386/pci/agp_machdep.c | 180 | ||||
-rw-r--r-- | sys/dev/pci/agp_i810.c | 30 | ||||
-rw-r--r-- | sys/dev/pci/agpvar.h | 9 |
4 files changed, 392 insertions, 8 deletions
diff --git a/sys/arch/amd64/pci/agp_machdep.c b/sys/arch/amd64/pci/agp_machdep.c index 6237588010d..ef7391845ba 100644 --- a/sys/arch/amd64/pci/agp_machdep.c +++ b/sys/arch/amd64/pci/agp_machdep.c @@ -1,6 +1,21 @@ -/* $OpenBSD: agp_machdep.c,v 1.3 2007/11/25 17:11:12 oga Exp $ */ +/* $OpenBSD: agp_machdep.c,v 1.4 2009/06/06 06:02:44 oga Exp $ */ /* + * Copyright (c) 2008 - 2009 Owain G. Ainsworth <oga@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* * Copyright (c) 2002 Michael Shalayeff * All rights reserved. * @@ -27,7 +42,9 @@ */ #include <sys/param.h> +#include <sys/systm.h> #include <sys/device.h> +#include <sys/malloc.h> #include <dev/pci/pcivar.h> #include <dev/pci/pcireg.h> @@ -35,9 +52,171 @@ #include <dev/pci/agpvar.h> #include <machine/cpufunc.h> +#include <machine/bus.h> + +#include "intagp.h" + +/* bus_dma functions */ + +#if NINTAGP > 0 +void intagp_dma_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t, + bus_size_t, int); +#endif void agp_flush_cache(void) { wbinvd(); } + +/* + * functions for bus_dma used by drm for GEM + * + * We use the sg_dma backend (also used by iommu) to provide the actual + * implementation, so all we need provide is the magic to create the tag, and + * the appropriate callbacks. + * + * We give the backend drivers a chance to honour the bus_dma flags, some of + * these may be used, for example to provide snooped mappings (intagp). + * For intagp at least, we honour the BUS_DMA_COHERENT flag, though it is not + * used often, and is * technically to be used for dmamem_map, we use it for + * dmamap_load since adding coherency involes flags to the gtt pagetables. + * We only use it for very special circumstances since when a GTT mapping is + * set to coherent, the cpu can't read or write through the gtt aperture. + * + * Currently, since the userland agp driver still needs to access the gart, we + * only do bus_dma for a section that we've been told is ours, hence the need + * for the init function at present. + */ + +int +agp_bus_dma_init(struct agp_softc *sc, bus_addr_t start, bus_addr_t end, + bus_dma_tag_t *dmat) +{ + struct bus_dma_tag *tag; + struct sg_cookie *cookie; + + /* + * XXX add agp map into the main queue that takes up our chunk of + * GTT space to prevent the userland api stealing any of it. + */ + if ((tag = malloc(sizeof(*tag), M_DEVBUF, + M_WAITOK | M_CANFAIL)) == NULL) + return (ENOMEM); + + if ((cookie = sg_dmatag_init("agpgtt", sc->sc_chipc, start, end - start, + sc->sc_methods->bind_page, sc->sc_methods->unbind_page, + sc->sc_methods->flush_tlb)) == NULL) { + free(tag, M_DEVBUF); + return (ENOMEM); + } + + tag->_cookie = cookie; + tag->_dmamap_create = sg_dmamap_create; + tag->_dmamap_destroy = sg_dmamap_destroy; + tag->_dmamap_load = sg_dmamap_load; + tag->_dmamap_load_mbuf = sg_dmamap_load_mbuf; + tag->_dmamap_load_uio = sg_dmamap_load_uio; + tag->_dmamap_load_raw = sg_dmamap_load_raw; + tag->_dmamap_unload = sg_dmamap_unload; + tag->_dmamem_alloc = sg_dmamem_alloc; + tag->_dmamem_free = _bus_dmamem_free; + tag->_dmamem_map = _bus_dmamem_map; + tag->_dmamem_unmap = _bus_dmamem_unmap; + tag->_dmamem_mmap = _bus_dmamem_mmap; + + /* Driver may need special sync handling */ + if (sc->sc_methods->dma_sync != NULL) { + tag->_dmamap_sync = sc->sc_methods->dma_sync; + } else { + tag->_dmamap_sync = _bus_dmamap_sync; + } + + *dmat = tag; + return (0); +} + +void +agp_bus_dma_destroy(struct agp_softc *sc, bus_dma_tag_t dmat) +{ + struct sg_cookie *cookie = dmat->_cookie; + + + /* + * XXX clear up blocker queue + */ + + sg_dmatag_destroy(cookie); + free(dmat, M_DEVBUF); +} + +void +agp_bus_dma_set_alignment(bus_dma_tag_t tag, bus_dmamap_t dmam, + u_long alignment) +{ + sg_dmamap_set_alignment(tag, dmam, alignment); +} + + +/* + * ick ick ick. However, the rest of this driver is supposedly MI (though + * they only exist on x86), so this can't be in dev/pci. + */ + +#if NINTAGP > 0 + +/* + * bus_dmamap_sync routine for intagp. + * + * This is tailored to the usage that drm with the GEM memory manager + * will be using, since intagp is for intel IGD, and thus shouldn't be + * used for anything other than gpu-based work. Essentially for the intel GEM + * driver we use bus_dma as an abstraction to convert our memory into a gtt + * address and deal with any cache incoherencies that we create. + * + * We use the cflush instruction to deal with clearing the caches, since our + * cache is physically indexed, we can even map then clear the page and it'll + * work. on i386 we need to check for the presence of cflush() in cpuid, + * however, all cpus that have a new enough intel GMCH should be suitable. + */ +void +intagp_dma_sync(bus_dma_tag_t tag, bus_dmamap_t dmam, + bus_addr_t offset, bus_size_t size, int ops) +{ +#ifdef DIAGNOSTIC + if ((ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) != 0 && + (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)) != 0) + panic("agp_dmamap_sync: mix PRE and POST"); + if (offset >= dmam->dm_mapsize) + panic("_intagp_dma_sync: bad offset %lu (size = %lu)", + offset, dmam->dm_mapsize); + if (size == 0 || (offset + size) > dmam->dm_mapsize) + panic("intagp_dma_sync: bad length"); +#endif /* DIAGNOSTIC */ + + /* Coherent mappings need no sync. */ + if (dmam->_dm_flags & BUS_DMA_COHERENT) + return; + + /* + * We need to clflush the object cache in all cases but postwrite. + * + * - Due to gpu incoherency, postread we need to flush speculative + * reads (which are not written back on intel cpus). + * + * - preread we need to flush data which will very soon be stale from + * the caches + * + * - prewrite we need to make sure our data hits the memory before the + * gpu hoovers it up. + * + * The chipset also may need flushing, but that fits badly into + * bus_dma and it done in the driver. + */ + if (ops & BUS_DMASYNC_POSTREAD || ops & BUS_DMASYNC_PREREAD || + ops & BUS_DMASYNC_PREWRITE) { + /* XXX use clflush */ + wbinvd(); + } +} +#endif diff --git a/sys/arch/i386/pci/agp_machdep.c b/sys/arch/i386/pci/agp_machdep.c index 507428a94d2..87bedfc6cd7 100644 --- a/sys/arch/i386/pci/agp_machdep.c +++ b/sys/arch/i386/pci/agp_machdep.c @@ -1,6 +1,21 @@ -/* $OpenBSD: agp_machdep.c,v 1.9 2007/11/25 17:11:12 oga Exp $ */ +/* $OpenBSD: agp_machdep.c,v 1.10 2009/06/06 06:02:44 oga Exp $ */ /* + * Copyright (c) 2008 - 2009 Owain G. Ainsworth <oga@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* * Copyright (c) 2002 Michael Shalayeff * All rights reserved. * @@ -27,7 +42,9 @@ */ #include <sys/param.h> +#include <sys/systm.h> #include <sys/device.h> +#include <sys/malloc.h> #include <dev/pci/pcivar.h> #include <dev/pci/pcireg.h> @@ -35,9 +52,170 @@ #include <dev/pci/agpvar.h> #include <machine/cpufunc.h> +#include <machine/bus.h> + +#include "intagp.h" + +/* bus_dma functions */ + +#if NINTAGP > 0 +void intagp_dma_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t, + bus_size_t, int); +#endif void agp_flush_cache(void) { wbinvd(); } + +/* + * functions for bus_dma used by drm for GEM + * + * We use the sg_dma backend (also used by iommu) to provide the actual + * implementation, so all we need provide is the magic to create the tag, and + * the appropriate callbacks. + * + * We give the backend drivers a chance to honour the bus_dma flags, some of + * these may be used, for example to provide snooped mappings (intagp). + * For intagp at least, we honour the BUS_DMA_COHERENT flag, though it is not + * used often, and is * technically to be used for dmamem_map, we use it for + * dmamap_load since adding coherency involes flags to the gtt pagetables. + * We only use it for very special circumstances since when a GTT mapping is + * set to coherent, the cpu can't read or write through the gtt aperture. + * + * Currently, since the userland agp driver still needs to access the gart, we + * only do bus_dma for a section that we've been told is ours, hence the need + * for the init function at present. + */ + +int +agp_bus_dma_init(struct agp_softc *sc, bus_addr_t start, bus_addr_t end, + bus_dma_tag_t *dmat) +{ + struct bus_dma_tag *tag; + struct sg_cookie *cookie; + + /* + * XXX add agp map into the main queue that takes up our chunk of + * GTT space to prevent the userland api stealing any of it. + */ + if ((tag = malloc(sizeof(*tag), M_DEVBUF, + M_WAITOK | M_CANFAIL)) == NULL) + return (ENOMEM); + + if ((cookie = sg_dmatag_init("agpgtt", sc->sc_chipc, start, end - start, + sc->sc_methods->bind_page, sc->sc_methods->unbind_page, + sc->sc_methods->flush_tlb)) == NULL) { + free(tag, M_DEVBUF); + return (ENOMEM); + } + + tag->_cookie = cookie; + tag->_dmamap_create = sg_dmamap_create; + tag->_dmamap_destroy = sg_dmamap_destroy; + tag->_dmamap_load = sg_dmamap_load; + tag->_dmamap_load_mbuf = sg_dmamap_load_mbuf; + tag->_dmamap_load_uio = sg_dmamap_load_uio; + tag->_dmamap_load_raw = sg_dmamap_load_raw; + tag->_dmamap_unload = sg_dmamap_unload; + tag->_dmamem_alloc = sg_dmamem_alloc; + tag->_dmamem_free = _bus_dmamem_free; + tag->_dmamem_map = _bus_dmamem_map; + tag->_dmamem_unmap = _bus_dmamem_unmap; + tag->_dmamem_mmap = _bus_dmamem_mmap; + + /* Driver may need special sync handling */ + if (sc->sc_methods->dma_sync != NULL) { + tag->_dmamap_sync = sc->sc_methods->dma_sync; + } else { + tag->_dmamap_sync = NULL; + } + + *dmat = tag; + return (0); +} + +void +agp_bus_dma_destroy(struct agp_softc *sc, bus_dma_tag_t dmat) +{ + struct sg_cookie *cookie = dmat->_cookie; + + + /* + * XXX clear up blocker queue + */ + + sg_dmatag_destroy(cookie); + free(dmat, M_DEVBUF); +} + +void +agp_bus_dma_set_alignment(bus_dma_tag_t tag, bus_dmamap_t dmam, + u_long alignment) +{ + sg_dmamap_set_alignment(tag, dmam, alignment); +} + + +/* + * ick ick ick. However, the rest of this driver is supposedly MI (though + * they only exist on x86), so this can't be in dev/pci. + */ + +#if NINTAGP > 0 + +/* + * bus_dmamap_sync routine for intagp. + * + * This is tailored to the usage that drm with the GEM memory manager + * will be using, since intagp is for intel IGD, and thus shouldn't be + * used for anything other than gpu-based work. Essentially for the intel GEM + * driver we use bus_dma as an abstraction to convert our memory into a gtt + * address and deal with any cache incoherencies that we create. + * + * We use the cflush instruction to deal with clearing the caches, since our + * cache is physically indexed, we can even map then clear the page and it'll + * work. on i386 we need to check for the presence of cflush() in cpuid, + * however, all cpus that have a new enough intel GMCH should be suitable. + */ +void +intagp_dma_sync(bus_dma_tag_t tag, bus_dmamap_t dmam, + bus_addr_t offset, bus_size_t size, int ops) +{ +#ifdef DIAGNOSTIC + if ((ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) != 0 && + (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)) != 0) + panic("agp_dmamap_sync: mix PRE and POST"); + if (offset >= dmam->dm_mapsize) + panic("_intagp_dma_sync: bad offset %lu (size = %lu)", + offset, dmam->dm_mapsize); + if (size == 0 || (offset + size) > dmam->dm_mapsize) + panic("intagp_dma_sync: bad length"); +#endif /* DIAGNOSTIC */ + + /* Coherent mappings need no sync. */ + if (dmam->_dm_flags & BUS_DMA_COHERENT) + return; + + /* + * We need to clflush the object cache in all cases but postwrite. + * + * - Due to gpu incoherency, postread we need to flush speculative + * reads (which are not written back on intel cpus). + * + * - preread we need to flush data which will very soon be stale from + * the caches + * + * - prewrite we need to make sure our data hits the memory before the + * gpu hoovers it up. + * + * The chipset also may need flushing, but that fits badly into + * bus_dma and it done in the driver. + */ + if (ops & BUS_DMASYNC_POSTREAD || ops & BUS_DMASYNC_PREREAD || + ops & BUS_DMASYNC_PREWRITE) { + wbinvd(); + } +} +#endif diff --git a/sys/dev/pci/agp_i810.c b/sys/dev/pci/agp_i810.c index 93e87cb7ff0..7ed939c079e 100644 --- a/sys/dev/pci/agp_i810.c +++ b/sys/dev/pci/agp_i810.c @@ -1,4 +1,4 @@ -/* $OpenBSD: agp_i810.c,v 1.54 2009/05/24 02:06:15 oga Exp $ */ +/* $OpenBSD: agp_i810.c,v 1.55 2009/06/06 06:02:44 oga Exp $ */ /*- * Copyright (c) 2000 Doug Rabson @@ -74,10 +74,12 @@ enum { struct agp_i810_softc { struct device dev; + bus_dma_segment_t scrib_seg; struct agp_softc *agpdev; struct agp_gatt *gatt; struct vga_pci_bar *map; struct vga_pci_bar *gtt_map; + bus_dmamap_t scrib_dmamap; bus_addr_t isc_apaddr; bus_size_t isc_apsize; /* current aperture size */ int chiptype; /* i810-like or i830 */ @@ -100,6 +102,9 @@ int agp_i810_unbind_memory(void *, struct agp_memory *); void intagp_write_gtt(struct agp_i810_softc *, bus_size_t, paddr_t); int intagp_gmch_match(struct pci_attach_args *); +extern void intagp_dma_sync(bus_dma_tag_t, bus_dmamap_t, + bus_addr_t, bus_size_t, int); + struct cfattach intagp_ca = { sizeof(struct agp_i810_softc), agp_i810_probe, agp_i810_attach }; @@ -112,6 +117,7 @@ struct agp_methods agp_i810_methods = { agp_i810_bind_page, agp_i810_unbind_page, agp_i810_flush_tlb, + intagp_dma_sync, agp_i810_enable, agp_i810_alloc_memory, agp_i810_free_memory, @@ -211,7 +217,7 @@ agp_i810_attach(struct device *parent, struct device *self, void *aux) struct agp_gatt *gatt; struct pci_attach_args *pa = aux, bpa; struct vga_pci_softc *vga = (struct vga_pci_softc *)parent; - bus_addr_t mmaddr, gmaddr; + bus_addr_t mmaddr, gmaddr, tmp; pcireg_t memtype, reg; u_int32_t stolen; u_int16_t gcc1; @@ -461,6 +467,18 @@ agp_i810_attach(struct device *parent, struct device *self, void *aux) /* Install the GATT. */ WRITE4(AGP_I810_PGTBL_CTL, gatt->ag_physical | 1); + /* Intel recommends that you have a fake page bound to the gtt always */ + if (agp_alloc_dmamem(pa->pa_dmat, AGP_PAGE_SIZE, &isc->scrib_dmamap, + &tmp, &isc->scrib_seg) != 0) { + printf(": can't get scribble page\n"); + return; + } + + /* initialise all gtt entries to point to scribble page */ + for (tmp = isc->isc_apaddr; tmp < (isc->isc_apaddr + isc->isc_apsize); + tmp += AGP_PAGE_SIZE) + agp_i810_unbind_page(isc, tmp); + /* * Make sure the chipset can see everything. */ @@ -535,7 +553,8 @@ agp_i810_unbind_page(void *sc, bus_size_t offset) { struct agp_i810_softc *isc = sc; - intagp_write_gtt(isc, offset - isc->isc_apaddr, 0); + intagp_write_gtt(isc, offset - isc->isc_apaddr, + isc->scrib_dmamap->dm_segs[0].ds_addr); } /* @@ -676,7 +695,8 @@ agp_i810_bind_memory(void *sc, struct agp_memory *mem, bus_size_t offset) if (mem->am_type == 2) { for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) - intagp_write_gtt(isc, offset + i, mem->am_physical + i); + agp_i810_bind_page(isc, offset + i, + mem->am_physical + i, 0); mem->am_offset = offset; mem->am_is_bound = 1; return (0); @@ -705,7 +725,7 @@ agp_i810_unbind_memory(void *sc, struct agp_memory *mem) if (mem->am_type == 2) { for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) - intagp_write_gtt(isc, mem->am_offset + i, 0); + agp_i810_unbind_page(isc, mem->am_offset + i); mem->am_offset = 0; mem->am_is_bound = 0; return (0); diff --git a/sys/dev/pci/agpvar.h b/sys/dev/pci/agpvar.h index 9ea6d3ed386..5bc6a6ee5fe 100644 --- a/sys/dev/pci/agpvar.h +++ b/sys/dev/pci/agpvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: agpvar.h,v 1.19 2009/05/10 16:57:44 oga Exp $ */ +/* $OpenBSD: agpvar.h,v 1.20 2009/06/06 06:02:44 oga Exp $ */ /* $NetBSD: agpvar.h,v 1.4 2001/10/01 21:54:48 fvdl Exp $ */ /*- @@ -102,6 +102,8 @@ struct agp_methods { void (*bind_page)(void *, bus_addr_t, paddr_t, int); void (*unbind_page)(void *, bus_addr_t); void (*flush_tlb)(void *); + void (*dma_sync)(bus_dma_tag_t, bus_dmamap_t, bus_addr_t, + bus_size_t, int); int (*enable)(void *, u_int32_t mode); struct agp_memory * (*alloc_memory)(void *, int, vsize_t); @@ -168,6 +170,11 @@ int agpdev_print(void *, const char *); int agpbus_probe(struct agp_attach_args *aa); +int agp_bus_dma_init(struct agp_softc *, bus_addr_t, bus_addr_t, + bus_dma_tag_t *); +void agp_bus_dma_destroy(struct agp_softc *, bus_dma_tag_t); +void agp_bus_dma_set_alignment(bus_dma_tag_t, bus_dmamap_t, + u_long); /* * Kernel API */ |