src - OpenBSD base system

diff options


context:
space:
mode:

author	Mark Kettenis <kettenis@cvs.openbsd.org>	2020-10-25 14:11:17 +0000
committer	Mark Kettenis <kettenis@cvs.openbsd.org>	2020-10-25 14:11:17 +0000
commit	f31ce9862ffec990cd4bffb94c4b0548744421b2 (patch)
tree	3ccf7b5a7944cc75cbc6863591ab837ccdce89b5 /sys/arch
parent	764dfc69bcdc55ff4651c8f773a22a752019bec1 (diff)

Implement a workaround for PCI devices that can't address the full 64-bit

PCI address space. Needed for radeondrm(4) and amdgpu(4) since Radeon GPUs only implement 36, 40 or 44 bits of address space.

Diffstat (limited to 'sys/arch')

-rw-r--r--

sys/arch/powerpc64/dev/phb.c

180

1 files changed, 173 insertions, 7 deletions

diff --git a/sys/arch/powerpc64/dev/phb.c b/sys/arch/powerpc64/dev/phb.c
index e09482ec476..2dc19dc37d3 100644
--- a/sys/arch/powerpc64/dev/phb.c
+++ b/sys/arch/powerpc64/dev/phb.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: phb.c,v 1.17 2020/10/24 14:38:46 kettenis Exp $ */

+/* $OpenBSD: phb.c,v 1.18 2020/10/25 14:11:16 kettenis Exp $ */

@@ -35,6 +35,15 @@ extern paddr_t physmax; /* machdep.c */

#define IODA_TVE_SELECT (1ULL << 59)

+#define IODA_TCE_TABLE_SIZE_MAX (1ULL << 42)

+#define IODA_TCE_READ (1ULL << 0)

+#define IODA_TCE_WRITE (1ULL << 1)

+#define PHB_DMA_OFFSET (1ULL << 32)

+struct phb_dmamem;

struct phb_range {

uint32_t flags;

uint64_t pci_base;

@@ -57,6 +66,7 @@ struct phb_softc {

uint64_t sc_phb_id;

uint64_t sc_pe_number;

+ struct phb_dmamem *sc_tce_table;

uint32_t sc_msi_ranges[2];

uint32_t sc_xive;

@@ -71,6 +81,22 @@ struct phb_softc {

int sc_bus;

};

+struct phb_dmamem {

+ bus_dmamap_t pdm_map;

+ bus_dma_segment_t pdm_seg;

+ size_t pdm_size;

+ caddr_t pdm_kva;

+};

+#define PHB_DMA_MAP(_pdm) ((_pdm)->pdm_map)

+#define PHB_DMA_LEN(_pdm) ((_pdm)->pdm_size)

+#define PHB_DMA_DVA(_pdm) ((_pdm)->pdm_map->dm_segs[0].ds_addr)

+#define PHB_DMA_KVA(_pdm) ((void *)(_pdm)->pdm_kva)

+struct phb_dmamem *phb_dmamem_alloc(bus_dma_tag_t, bus_size_t,

+ bus_size_t);

+void phb_dmamem_free(bus_dma_tag_t, struct phb_dmamem *);

int phb_match(struct device *, void *, void *);

void phb_attach(struct device *, struct device *, void *);

@@ -82,6 +108,8 @@ struct cfdriver phb_cd = {

NULL, "phb", DV_DULL

};

+void phb_setup_tce_table(struct phb_softc *sc);

void phb_attach_hook(struct device *, struct device *,

struct pcibus_attach_args *);

int phb_bus_maxdevs(void *, int);

@@ -200,8 +228,7 @@ phb_attach(struct device *parent, struct device *self, void *aux)

}

ranges = malloc(rangeslen, M_TEMP, M_WAITOK);

- OF_getpropintarray(sc->sc_node, "ranges", ranges,

- rangeslen);

+ OF_getpropintarray(sc->sc_node, "ranges", ranges, rangeslen);

* Reserve an extra slot here and make sure it is filled

@@ -318,6 +345,24 @@ phb_attach(struct device *parent, struct device *self, void *aux)

printf("\n");

+ /*

+ * The DMA controllers of many PCI devices developed for the

+ * x86 architectures may not support the full 64-bit PCI

+ * address space. Examples of such devices are Radeon GPUs

+ * that support only 36, 40 or 44 address lines. This means

+ * they can't enable the TVE selection bit to request IODA

+ * no-translate (bypass) operation.

+ *

+ * To allow such devices to work, we set up a TCE table that

+ * provides a 1:1 mapping of all physical memory where the

+ * physical page at address zero as mapped at 4 GB in PCI

+ * address space. If we fail to set up this TCE table we fall

+ * back on using no-translate operation, which means that

+ * devices that don't implenent 64 address lines may not

+ * function properly.

+ */

+ phb_setup_tce_table(sc);

memcpy(&sc->sc_bus_iot, sc->sc_iot, sizeof(sc->sc_bus_iot));

sc->sc_bus_iot.bus_private = sc;

sc->sc_bus_iot._space_map = phb_bs_iomap;

@@ -377,6 +422,69 @@ phb_attach(struct device *parent, struct device *self, void *aux)

}

void

+phb_setup_tce_table(struct phb_softc *sc)

+ uint64_t tce_table_size, tce_page_size;

+ uint64_t *tce;

+ uint32_t *tce_sizes;

+ int tce_page_shift;

+ int i, len, offset, nentries;

+ paddr_t pa;

+ int64_t error;

+ /* Determine the maximum supported TCE page size. */

+ len = OF_getproplen(sc->sc_node, "ibm,supported-tce-sizes");

+ if (len <= 0 || (len % sizeof(uint32_t)))

+ return;

+ tce_sizes = malloc(len, M_TEMP, M_WAITOK);

+ OF_getpropintarray(sc->sc_node, "ibm,supported-tce-sizes",

+ tce_sizes, len);

+ tce_page_shift = 0;

+ for (i = 0; i < len / sizeof(uint32_t); i++)

+ tce_page_shift = MAX(tce_page_shift, tce_sizes[i]);

+ free(tce_sizes, M_TEMP, len);

+ /* Bail out if we don't support 2G pages. */

+ if (tce_page_shift < 30)

+ return;

+ /* Calculate the appropriate size of the TCE table. */

+ tce_page_size = (1ULL << tce_page_shift);

+ tce_table_size = PAGE_SIZE;

+ nentries = howmany(PHB_DMA_OFFSET + physmax, tce_page_size);

+ while (tce_table_size < nentries * sizeof(*tce))

+ tce_table_size *= 2;

+ if (tce_table_size > IODA_TCE_TABLE_SIZE_MAX)

+ return;

+ /* Allocate the TCE table. */

+ sc->sc_tce_table = phb_dmamem_alloc(sc->sc_dmat,

+ tce_table_size, PAGE_SIZE);

+ if (sc->sc_tce_table == NULL) {

+ printf(": can't allocate DMA translation table\n");

+ return;

+ }

+ /* Fill TCE table. */

+ tce = PHB_DMA_KVA(sc->sc_tce_table);

+ offset = PHB_DMA_OFFSET >> tce_page_shift;

+ for (pa = 0, i = 0; pa < physmax; pa += tce_page_size, i++)

+ tce[i + offset] = pa | IODA_TCE_READ | IODA_TCE_WRITE;

+ /* Set up translations. */

+ error = opal_pci_map_pe_dma_window(sc->sc_phb_id,

+ sc->sc_pe_number, sc->sc_pe_number << 1, 1,

+ PHB_DMA_DVA(sc->sc_tce_table), PHB_DMA_LEN(sc->sc_tce_table),

+ tce_page_size);

+ if (error != OPAL_SUCCESS) {

+ printf("%s: can't enable DMA window\n", sc->sc_dev.dv_xname);

+ phb_dmamem_free(sc->sc_dmat, sc->sc_tce_table);

+ sc->sc_tce_table = NULL;

+ return;

+ }

+void

phb_attach_hook(struct device *parent, struct device *self,

struct pcibus_attach_args *pba)

{

@@ -676,8 +784,13 @@ phb_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,

return error;

/* For each segment. */

- for (seg = firstseg; seg <= *segp; seg++)

- map->dm_segs[seg].ds_addr |= IODA_TVE_SELECT;

+ for (seg = firstseg; seg <= *segp; seg++) {

+ map->dm_segs[seg].ds_addr = map->dm_segs[seg]._ds_paddr;

+ if (sc->sc_tce_table)

+ map->dm_segs[seg].ds_addr += PHB_DMA_OFFSET;

+ else

+ map->dm_segs[seg].ds_addr |= IODA_TVE_SELECT;

+ }

return 0;

}

@@ -695,8 +808,61 @@ phb_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map,

return error;

/* For each segment. */

- for (seg = 0; seg < nsegs; seg++)

- map->dm_segs[seg].ds_addr |= IODA_TVE_SELECT;

+ for (seg = 0; seg < nsegs; seg++) {

+ map->dm_segs[seg].ds_addr = map->dm_segs[seg]._ds_paddr;

+ if (sc->sc_tce_table)

+ map->dm_segs[seg].ds_addr += PHB_DMA_OFFSET;

+ else

+ map->dm_segs[seg].ds_addr |= IODA_TVE_SELECT;

+ }

return 0;

}

+struct phb_dmamem *

+phb_dmamem_alloc(bus_dma_tag_t dmat, bus_size_t size, bus_size_t align)

+ struct phb_dmamem *pdm;

+ int nsegs;

+ pdm = malloc(sizeof(*pdm), M_DEVBUF, M_WAITOK | M_ZERO);

+ pdm->pdm_size = size;

+ if (bus_dmamap_create(dmat, size, 1, size, 0,

+ BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &pdm->pdm_map) != 0)

+ goto pdmfree;

+ if (bus_dmamem_alloc(dmat, size, align, 0, &pdm->pdm_seg, 1,

+ &nsegs, BUS_DMA_WAITOK | BUS_DMA_ZERO) != 0)

+ goto destroy;

+ if (bus_dmamem_map(dmat, &pdm->pdm_seg, nsegs, size,

+ &pdm->pdm_kva, BUS_DMA_WAITOK | BUS_DMA_NOCACHE) != 0)

+ goto free;

+ if (bus_dmamap_load_raw(dmat, pdm->pdm_map, &pdm->pdm_seg,

+ nsegs, size, BUS_DMA_WAITOK) != 0)

+ goto unmap;

+ return pdm;

+unmap:

+ bus_dmamem_unmap(dmat, pdm->pdm_kva, size);

+free:

+ bus_dmamem_free(dmat, &pdm->pdm_seg, 1);

+destroy:

+ bus_dmamap_destroy(dmat, pdm->pdm_map);

+pdmfree:

+ free(pdm, M_DEVBUF, sizeof(*pdm));

+ return NULL;

+void

+phb_dmamem_free(bus_dma_tag_t dmat, struct phb_dmamem *pdm)

+ bus_dmamem_unmap(dmat, pdm->pdm_kva, pdm->pdm_size);

+ bus_dmamem_free(dmat, &pdm->pdm_seg, 1);

+ bus_dmamap_destroy(dmat, pdm->pdm_map);

+ free(pdm, M_DEVBUF, sizeof(*pdm));