summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2018-01-08 05:41:34 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2018-01-08 05:41:34 +0000
commitc00801de923e125863aaf8180439d59d610b2517 (patch)
treee2896aa2785f3cf2151aeeb3c95fb5cc09a2fe02 /lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
parentbe30e6efb92db21299b936c0e068e7088941e9c9 (diff)
Revert to Mesa 13.0.6 again.
Corruption has again been reported on Intel hardware running Xorg with the modesetting driver (which uses OpenGL based acceleration instead of SNA acceleration the intel driver defaults to). Reported in various forms on Sandy Bridge (X220), Ivy Bridge (X230) and Haswell (X240). Confirmed to not occur with the intel driver but the xserver was changed to default to the modesetting driver on >= gen4 hardware (except Ironlake). One means of triggering this is to open a large pdf with xpdf on an idle machine and highlight a section of the document. There have been reports of gpu hangs on gen4 intel hardware (T500 with GM45, X61 with 965GM) when starting Xorg as well.
Diffstat (limited to 'lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c')
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c676
1 files changed, 111 insertions, 565 deletions
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 97bbe235a..e7ea51978 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -38,13 +38,6 @@
#include <stdio.h>
#include <inttypes.h>
-/* Set to 1 for verbose output showing committed sparse buffer ranges. */
-#define DEBUG_SPARSE_COMMITS 0
-
-struct amdgpu_sparse_backing_chunk {
- uint32_t begin, end;
-};
-
static struct pb_buffer *
amdgpu_bo_create(struct radeon_winsys *rws,
uint64_t size,
@@ -90,7 +83,7 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
unsigned idle_fences;
bool buffer_idle;
- mtx_lock(&ws->bo_fence_lock);
+ pipe_mutex_lock(ws->bo_fence_lock);
for (idle_fences = 0; idle_fences < bo->num_fences; ++idle_fences) {
if (!amdgpu_fence_wait(bo->fences[idle_fences], 0, false))
@@ -106,13 +99,13 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
bo->num_fences -= idle_fences;
buffer_idle = !bo->num_fences;
- mtx_unlock(&ws->bo_fence_lock);
+ pipe_mutex_unlock(ws->bo_fence_lock);
return buffer_idle;
} else {
bool buffer_idle = true;
- mtx_lock(&ws->bo_fence_lock);
+ pipe_mutex_lock(ws->bo_fence_lock);
while (bo->num_fences && buffer_idle) {
struct pipe_fence_handle *fence = NULL;
bool fence_idle = false;
@@ -120,12 +113,12 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
amdgpu_fence_reference(&fence, bo->fences[0]);
/* Wait for the fence. */
- mtx_unlock(&ws->bo_fence_lock);
+ pipe_mutex_unlock(ws->bo_fence_lock);
if (amdgpu_fence_wait(fence, abs_timeout, true))
fence_idle = true;
else
buffer_idle = false;
- mtx_lock(&ws->bo_fence_lock);
+ pipe_mutex_lock(ws->bo_fence_lock);
/* Release an idle fence to avoid checking it again later, keeping in
* mind that the fence array may have been modified by other threads.
@@ -139,7 +132,7 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
amdgpu_fence_reference(&fence, NULL);
}
- mtx_unlock(&ws->bo_fence_lock);
+ pipe_mutex_unlock(ws->bo_fence_lock);
return buffer_idle;
}
@@ -167,10 +160,10 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
assert(bo->bo && "must not be called for slab entries");
- mtx_lock(&bo->ws->global_bo_list_lock);
+ pipe_mutex_lock(bo->ws->global_bo_list_lock);
LIST_DEL(&bo->u.real.global_list_item);
bo->ws->num_buffers--;
- mtx_unlock(&bo->ws->global_bo_list_lock);
+ pipe_mutex_unlock(bo->ws->global_bo_list_lock);
amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
amdgpu_va_range_free(bo->u.real.va_handle);
@@ -188,7 +181,6 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
bo->ws->mapped_vram -= bo->base.size;
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
bo->ws->mapped_gtt -= bo->base.size;
- bo->ws->num_mapped_buffers--;
}
FREE(bo);
@@ -217,8 +209,6 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,
void *cpu = NULL;
uint64_t offset = 0;
- assert(!bo->sparse);
-
/* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
/* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
@@ -321,7 +311,6 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,
real->ws->mapped_vram += real->base.size;
else if (real->initial_domain & RADEON_DOMAIN_GTT)
real->ws->mapped_gtt += real->base.size;
- real->ws->num_mapped_buffers++;
}
return (uint8_t*)cpu + offset;
}
@@ -331,8 +320,6 @@ static void amdgpu_bo_unmap(struct pb_buffer *buf)
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
struct amdgpu_winsys_bo *real;
- assert(!bo->sparse);
-
if (bo->user_ptr)
return;
@@ -343,7 +330,6 @@ static void amdgpu_bo_unmap(struct pb_buffer *buf)
real->ws->mapped_vram -= real->base.size;
else if (real->initial_domain & RADEON_DOMAIN_GTT)
real->ws->mapped_gtt -= real->base.size;
- real->ws->num_mapped_buffers--;
}
amdgpu_bo_cpu_unmap(real->bo);
@@ -360,10 +346,10 @@ static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
assert(bo->bo);
- mtx_lock(&ws->global_bo_list_lock);
+ pipe_mutex_lock(ws->global_bo_list_lock);
LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list);
ws->num_buffers++;
- mtx_unlock(&ws->global_bo_list_lock);
+ pipe_mutex_unlock(ws->global_bo_list_lock);
}
static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
@@ -398,6 +384,8 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
if (initial_domain & RADEON_DOMAIN_GTT)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
+ if (flags & RADEON_FLAG_CPU_ACCESS)
+ request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
if (flags & RADEON_FLAG_NO_CPU_ACCESS)
request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (flags & RADEON_FLAG_GTT_WC)
@@ -413,8 +401,6 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
}
va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
- if (size > ws->info.pte_fragment_size)
- alignment = MAX2(alignment, ws->info.pte_fragment_size);
r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
size + va_gap_size, alignment, 0, &va, &va_handle, 0);
if (r)
@@ -495,16 +481,33 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
{
struct amdgpu_winsys *ws = priv;
struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab);
- enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
- enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
+ enum radeon_bo_domain domains;
+ enum radeon_bo_flag flags = 0;
uint32_t base_id;
if (!slab)
return NULL;
- unsigned slab_size = 1 << AMDGPU_SLAB_BO_SIZE_LOG2;
+ if (heap & 1)
+ flags |= RADEON_FLAG_GTT_WC;
+ if (heap & 2)
+ flags |= RADEON_FLAG_CPU_ACCESS;
+
+ switch (heap >> 2) {
+ case 0:
+ domains = RADEON_DOMAIN_VRAM;
+ break;
+ default:
+ case 1:
+ domains = RADEON_DOMAIN_VRAM_GTT;
+ break;
+ case 2:
+ domains = RADEON_DOMAIN_GTT;
+ break;
+ }
+
slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(&ws->base,
- slab_size, slab_size,
+ 64 * 1024, 64 * 1024,
domains, flags));
if (!slab->buffer)
goto fail;
@@ -560,462 +563,6 @@ void amdgpu_bo_slab_free(void *priv, struct pb_slab *pslab)
FREE(slab);
}
-#if DEBUG_SPARSE_COMMITS
-static void
-sparse_dump(struct amdgpu_winsys_bo *bo, const char *func)
-{
- fprintf(stderr, "%s: %p (size=%"PRIu64", num_va_pages=%u) @ %s\n"
- "Commitments:\n",
- __func__, bo, bo->base.size, bo->u.sparse.num_va_pages, func);
-
- struct amdgpu_sparse_backing *span_backing = NULL;
- uint32_t span_first_backing_page = 0;
- uint32_t span_first_va_page = 0;
- uint32_t va_page = 0;
-
- for (;;) {
- struct amdgpu_sparse_backing *backing = 0;
- uint32_t backing_page = 0;
-
- if (va_page < bo->u.sparse.num_va_pages) {
- backing = bo->u.sparse.commitments[va_page].backing;
- backing_page = bo->u.sparse.commitments[va_page].page;
- }
-
- if (span_backing &&
- (backing != span_backing ||
- backing_page != span_first_backing_page + (va_page - span_first_va_page))) {
- fprintf(stderr, " %u..%u: backing=%p:%u..%u\n",
- span_first_va_page, va_page - 1, span_backing,
- span_first_backing_page,
- span_first_backing_page + (va_page - span_first_va_page) - 1);
-
- span_backing = NULL;
- }
-
- if (va_page >= bo->u.sparse.num_va_pages)
- break;
-
- if (backing && !span_backing) {
- span_backing = backing;
- span_first_backing_page = backing_page;
- span_first_va_page = va_page;
- }
-
- va_page++;
- }
-
- fprintf(stderr, "Backing:\n");
-
- list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
- fprintf(stderr, " %p (size=%"PRIu64")\n", backing, backing->bo->base.size);
- for (unsigned i = 0; i < backing->num_chunks; ++i)
- fprintf(stderr, " %u..%u\n", backing->chunks[i].begin, backing->chunks[i].end);
- }
-}
-#endif
-
-/*
- * Attempt to allocate the given number of backing pages. Fewer pages may be
- * allocated (depending on the fragmentation of existing backing buffers),
- * which will be reflected by a change to *pnum_pages.
- */
-static struct amdgpu_sparse_backing *
-sparse_backing_alloc(struct amdgpu_winsys_bo *bo, uint32_t *pstart_page, uint32_t *pnum_pages)
-{
- struct amdgpu_sparse_backing *best_backing;
- unsigned best_idx;
- uint32_t best_num_pages;
-
- best_backing = NULL;
- best_idx = 0;
- best_num_pages = 0;
-
- /* This is a very simple and inefficient best-fit algorithm. */
- list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
- for (unsigned idx = 0; idx < backing->num_chunks; ++idx) {
- uint32_t cur_num_pages = backing->chunks[idx].end - backing->chunks[idx].begin;
- if ((best_num_pages < *pnum_pages && cur_num_pages > best_num_pages) ||
- (best_num_pages > *pnum_pages && cur_num_pages < best_num_pages)) {
- best_backing = backing;
- best_idx = idx;
- best_num_pages = cur_num_pages;
- }
- }
- }
-
- /* Allocate a new backing buffer if necessary. */
- if (!best_backing) {
- struct pb_buffer *buf;
- uint64_t size;
- uint32_t pages;
-
- best_backing = CALLOC_STRUCT(amdgpu_sparse_backing);
- if (!best_backing)
- return NULL;
-
- best_backing->max_chunks = 4;
- best_backing->chunks = CALLOC(best_backing->max_chunks,
- sizeof(*best_backing->chunks));
- if (!best_backing->chunks) {
- FREE(best_backing);
- return NULL;
- }
-
- assert(bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.size, RADEON_SPARSE_PAGE_SIZE));
-
- size = MIN3(bo->base.size / 16,
- 8 * 1024 * 1024,
- bo->base.size - (uint64_t)bo->u.sparse.num_backing_pages * RADEON_SPARSE_PAGE_SIZE);
- size = MAX2(size, RADEON_SPARSE_PAGE_SIZE);
-
- buf = amdgpu_bo_create(&bo->ws->base, size, RADEON_SPARSE_PAGE_SIZE,
- bo->initial_domain,
- bo->u.sparse.flags | RADEON_FLAG_NO_SUBALLOC);
- if (!buf) {
- FREE(best_backing->chunks);
- FREE(best_backing);
- return NULL;
- }
-
- /* We might have gotten a bigger buffer than requested via caching. */
- pages = buf->size / RADEON_SPARSE_PAGE_SIZE;
-
- best_backing->bo = amdgpu_winsys_bo(buf);
- best_backing->num_chunks = 1;
- best_backing->chunks[0].begin = 0;
- best_backing->chunks[0].end = pages;
-
- list_add(&best_backing->list, &bo->u.sparse.backing);
- bo->u.sparse.num_backing_pages += pages;
-
- best_idx = 0;
- best_num_pages = pages;
- }
-
- *pnum_pages = MIN2(*pnum_pages, best_num_pages);
- *pstart_page = best_backing->chunks[best_idx].begin;
- best_backing->chunks[best_idx].begin += *pnum_pages;
-
- if (best_backing->chunks[best_idx].begin >= best_backing->chunks[best_idx].end) {
- memmove(&best_backing->chunks[best_idx], &best_backing->chunks[best_idx + 1],
- sizeof(*best_backing->chunks) * (best_backing->num_chunks - best_idx - 1));
- best_backing->num_chunks--;
- }
-
- return best_backing;
-}
-
-static void
-sparse_free_backing_buffer(struct amdgpu_winsys_bo *bo,
- struct amdgpu_sparse_backing *backing)
-{
- struct amdgpu_winsys *ws = backing->bo->ws;
-
- bo->u.sparse.num_backing_pages -= backing->bo->base.size / RADEON_SPARSE_PAGE_SIZE;
-
- mtx_lock(&ws->bo_fence_lock);
- amdgpu_add_fences(backing->bo, bo->num_fences, bo->fences);
- mtx_unlock(&ws->bo_fence_lock);
-
- list_del(&backing->list);
- amdgpu_winsys_bo_reference(&backing->bo, NULL);
- FREE(backing->chunks);
- FREE(backing);
-}
-
-/*
- * Return a range of pages from the given backing buffer back into the
- * free structure.
- */
-static bool
-sparse_backing_free(struct amdgpu_winsys_bo *bo,
- struct amdgpu_sparse_backing *backing,
- uint32_t start_page, uint32_t num_pages)
-{
- uint32_t end_page = start_page + num_pages;
- unsigned low = 0;
- unsigned high = backing->num_chunks;
-
- /* Find the first chunk with begin >= start_page. */
- while (low < high) {
- unsigned mid = low + (high - low) / 2;
-
- if (backing->chunks[mid].begin >= start_page)
- high = mid;
- else
- low = mid + 1;
- }
-
- assert(low >= backing->num_chunks || end_page <= backing->chunks[low].begin);
- assert(low == 0 || backing->chunks[low - 1].end <= start_page);
-
- if (low > 0 && backing->chunks[low - 1].end == start_page) {
- backing->chunks[low - 1].end = end_page;
-
- if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
- backing->chunks[low - 1].end = backing->chunks[low].end;
- memmove(&backing->chunks[low], &backing->chunks[low + 1],
- sizeof(*backing->chunks) * (backing->num_chunks - low - 1));
- backing->num_chunks--;
- }
- } else if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
- backing->chunks[low].begin = start_page;
- } else {
- if (backing->num_chunks >= backing->max_chunks) {
- unsigned new_max_chunks = 2 * backing->max_chunks;
- struct amdgpu_sparse_backing_chunk *new_chunks =
- REALLOC(backing->chunks,
- sizeof(*backing->chunks) * backing->max_chunks,
- sizeof(*backing->chunks) * new_max_chunks);
- if (!new_chunks)
- return false;
-
- backing->max_chunks = new_max_chunks;
- backing->chunks = new_chunks;
- }
-
- memmove(&backing->chunks[low + 1], &backing->chunks[low],
- sizeof(*backing->chunks) * (backing->num_chunks - low));
- backing->chunks[low].begin = start_page;
- backing->chunks[low].end = end_page;
- backing->num_chunks++;
- }
-
- if (backing->num_chunks == 1 && backing->chunks[0].begin == 0 &&
- backing->chunks[0].end == backing->bo->base.size / RADEON_SPARSE_PAGE_SIZE)
- sparse_free_backing_buffer(bo, backing);
-
- return true;
-}
-
-static void amdgpu_bo_sparse_destroy(struct pb_buffer *_buf)
-{
- struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
- int r;
-
- assert(!bo->bo && bo->sparse);
-
- r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
- (uint64_t)bo->u.sparse.num_va_pages * RADEON_SPARSE_PAGE_SIZE,
- bo->va, 0, AMDGPU_VA_OP_CLEAR);
- if (r) {
- fprintf(stderr, "amdgpu: clearing PRT VA region on destroy failed (%d)\n", r);
- }
-
- while (!list_empty(&bo->u.sparse.backing)) {
- struct amdgpu_sparse_backing *dummy = NULL;
- sparse_free_backing_buffer(bo,
- container_of(bo->u.sparse.backing.next,
- dummy, list));
- }
-
- amdgpu_va_range_free(bo->u.sparse.va_handle);
- mtx_destroy(&bo->u.sparse.commit_lock);
- FREE(bo->u.sparse.commitments);
- FREE(bo);
-}
-
-static const struct pb_vtbl amdgpu_winsys_bo_sparse_vtbl = {
- amdgpu_bo_sparse_destroy
- /* other functions are never called */
-};
-
-static struct pb_buffer *
-amdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size,
- enum radeon_bo_domain domain,
- enum radeon_bo_flag flags)
-{
- struct amdgpu_winsys_bo *bo;
- uint64_t map_size;
- uint64_t va_gap_size;
- int r;
-
- /* We use 32-bit page numbers; refuse to attempt allocating sparse buffers
- * that exceed this limit. This is not really a restriction: we don't have
- * that much virtual address space anyway.
- */
- if (size > (uint64_t)INT32_MAX * RADEON_SPARSE_PAGE_SIZE)
- return NULL;
-
- bo = CALLOC_STRUCT(amdgpu_winsys_bo);
- if (!bo)
- return NULL;
-
- pipe_reference_init(&bo->base.reference, 1);
- bo->base.alignment = RADEON_SPARSE_PAGE_SIZE;
- bo->base.size = size;
- bo->base.vtbl = &amdgpu_winsys_bo_sparse_vtbl;
- bo->ws = ws;
- bo->initial_domain = domain;
- bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
- bo->sparse = true;
- bo->u.sparse.flags = flags & ~RADEON_FLAG_SPARSE;
-
- bo->u.sparse.num_va_pages = DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);
- bo->u.sparse.commitments = CALLOC(bo->u.sparse.num_va_pages,
- sizeof(*bo->u.sparse.commitments));
- if (!bo->u.sparse.commitments)
- goto error_alloc_commitments;
-
- mtx_init(&bo->u.sparse.commit_lock, mtx_plain);
- LIST_INITHEAD(&bo->u.sparse.backing);
-
- /* For simplicity, we always map a multiple of the page size. */
- map_size = align64(size, RADEON_SPARSE_PAGE_SIZE);
- va_gap_size = ws->check_vm ? 4 * RADEON_SPARSE_PAGE_SIZE : 0;
- r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
- map_size + va_gap_size, RADEON_SPARSE_PAGE_SIZE,
- 0, &bo->va, &bo->u.sparse.va_handle, 0);
- if (r)
- goto error_va_alloc;
-
- r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0, size, bo->va,
- AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
- if (r)
- goto error_va_map;
-
- return &bo->base;
-
-error_va_map:
- amdgpu_va_range_free(bo->u.sparse.va_handle);
-error_va_alloc:
- mtx_destroy(&bo->u.sparse.commit_lock);
- FREE(bo->u.sparse.commitments);
-error_alloc_commitments:
- FREE(bo);
- return NULL;
-}
-
-static bool
-amdgpu_bo_sparse_commit(struct pb_buffer *buf, uint64_t offset, uint64_t size,
- bool commit)
-{
- struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buf);
- struct amdgpu_sparse_commitment *comm;
- uint32_t va_page, end_va_page;
- bool ok = true;
- int r;
-
- assert(bo->sparse);
- assert(offset % RADEON_SPARSE_PAGE_SIZE == 0);
- assert(offset <= bo->base.size);
- assert(size <= bo->base.size - offset);
- assert(size % RADEON_SPARSE_PAGE_SIZE == 0 || offset + size == bo->base.size);
-
- comm = bo->u.sparse.commitments;
- va_page = offset / RADEON_SPARSE_PAGE_SIZE;
- end_va_page = va_page + DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);
-
- mtx_lock(&bo->u.sparse.commit_lock);
-
-#if DEBUG_SPARSE_COMMITS
- sparse_dump(bo, __func__);
-#endif
-
- if (commit) {
- while (va_page < end_va_page) {
- uint32_t span_va_page;
-
- /* Skip pages that are already committed. */
- if (comm[va_page].backing) {
- va_page++;
- continue;
- }
-
- /* Determine length of uncommitted span. */
- span_va_page = va_page;
- while (va_page < end_va_page && !comm[va_page].backing)
- va_page++;
-
- /* Fill the uncommitted span with chunks of backing memory. */
- while (span_va_page < va_page) {
- struct amdgpu_sparse_backing *backing;
- uint32_t backing_start, backing_size;
-
- backing_size = va_page - span_va_page;
- backing = sparse_backing_alloc(bo, &backing_start, &backing_size);
- if (!backing) {
- ok = false;
- goto out;
- }
-
- r = amdgpu_bo_va_op_raw(bo->ws->dev, backing->bo->bo,
- (uint64_t)backing_start * RADEON_SPARSE_PAGE_SIZE,
- (uint64_t)backing_size * RADEON_SPARSE_PAGE_SIZE,
- bo->va + (uint64_t)span_va_page * RADEON_SPARSE_PAGE_SIZE,
- AMDGPU_VM_PAGE_READABLE |
- AMDGPU_VM_PAGE_WRITEABLE |
- AMDGPU_VM_PAGE_EXECUTABLE,
- AMDGPU_VA_OP_REPLACE);
- if (r) {
- ok = sparse_backing_free(bo, backing, backing_start, backing_size);
- assert(ok && "sufficient memory should already be allocated");
-
- ok = false;
- goto out;
- }
-
- while (backing_size) {
- comm[span_va_page].backing = backing;
- comm[span_va_page].page = backing_start;
- span_va_page++;
- backing_start++;
- backing_size--;
- }
- }
- }
- } else {
- r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
- (uint64_t)(end_va_page - va_page) * RADEON_SPARSE_PAGE_SIZE,
- bo->va + (uint64_t)va_page * RADEON_SPARSE_PAGE_SIZE,
- AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_REPLACE);
- if (r) {
- ok = false;
- goto out;
- }
-
- while (va_page < end_va_page) {
- struct amdgpu_sparse_backing *backing;
- uint32_t backing_start;
- uint32_t span_pages;
-
- /* Skip pages that are already uncommitted. */
- if (!comm[va_page].backing) {
- va_page++;
- continue;
- }
-
- /* Group contiguous spans of pages. */
- backing = comm[va_page].backing;
- backing_start = comm[va_page].page;
- comm[va_page].backing = NULL;
-
- span_pages = 1;
- va_page++;
-
- while (va_page < end_va_page &&
- comm[va_page].backing == backing &&
- comm[va_page].page == backing_start + span_pages) {
- comm[va_page].backing = NULL;
- va_page++;
- span_pages++;
- }
-
- if (!sparse_backing_free(bo, backing, backing_start, span_pages)) {
- /* Couldn't allocate tracking data structures, so we have to leak */
- fprintf(stderr, "amdgpu: leaking PRT backing memory\n");
- ok = false;
- }
- }
- }
-out:
-
- mtx_unlock(&bo->u.sparse.commit_lock);
-
- return ok;
-}
-
static unsigned eg_tile_split(unsigned tile_split)
{
switch (tile_split) {
@@ -1050,7 +597,7 @@ static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
struct amdgpu_bo_info info = {0};
- uint64_t tiling_flags;
+ uint32_t tiling_flags;
int r;
assert(bo->bo && "must not be called for slab entries");
@@ -1061,25 +608,21 @@ static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
tiling_flags = info.metadata.tiling_info;
- if (bo->ws->info.chip_class >= GFX9) {
- md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
- } else {
- md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
- md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
-
- if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
- md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
- else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
- md->u.legacy.microtile = RADEON_LAYOUT_TILED;
-
- md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
- md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
- md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
- md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
- md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
- md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
- md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
- }
+ md->microtile = RADEON_LAYOUT_LINEAR;
+ md->macrotile = RADEON_LAYOUT_LINEAR;
+
+ if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
+ md->macrotile = RADEON_LAYOUT_TILED;
+ else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
+ md->microtile = RADEON_LAYOUT_TILED;
+
+ md->pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
+ md->bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+ md->bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+ md->tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
+ md->mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+ md->num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+ md->scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
md->size_metadata = info.metadata.size_metadata;
memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
@@ -1090,33 +633,29 @@ static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
struct amdgpu_bo_metadata metadata = {0};
- uint64_t tiling_flags = 0;
+ uint32_t tiling_flags = 0;
assert(bo->bo && "must not be called for slab entries");
- if (bo->ws->info.chip_class >= GFX9) {
- tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
- } else {
- if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
- tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
- else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
- tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
- else
- tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
-
- tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
- tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
- tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
- if (md->u.legacy.tile_split)
- tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(md->u.legacy.tile_split));
- tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
- tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
-
- if (md->u.legacy.scanout)
- tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
- else
- tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
- }
+ if (md->macrotile == RADEON_LAYOUT_TILED)
+ tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
+ else if (md->microtile == RADEON_LAYOUT_TILED)
+ tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
+ else
+ tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
+
+ tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->pipe_config);
+ tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->bankw));
+ tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->bankh));
+ if (md->tile_split)
+ tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(md->tile_split));
+ tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->mtilea));
+ tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->num_banks)-1);
+
+ if (md->scanout)
+ tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
+ else
+ tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
metadata.tiling_info = tiling_flags;
metadata.size_metadata = md->size_metadata;
@@ -1136,21 +675,33 @@ amdgpu_bo_create(struct radeon_winsys *rws,
struct amdgpu_winsys_bo *bo;
unsigned usage = 0, pb_cache_bucket;
- /* VRAM implies WC. This is not optional. */
- assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
-
- /* NO_CPU_ACCESS is valid with VRAM only. */
- assert(domain == RADEON_DOMAIN_VRAM || !(flags & RADEON_FLAG_NO_CPU_ACCESS));
-
/* Sub-allocate small buffers from slabs. */
- if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) &&
+ if (!(flags & RADEON_FLAG_HANDLE) &&
size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) &&
alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
struct pb_slab_entry *entry;
- int heap = radeon_get_heap_index(domain, flags);
+ unsigned heap = 0;
- if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS)
+ if (flags & RADEON_FLAG_GTT_WC)
+ heap |= 1;
+ if (flags & RADEON_FLAG_CPU_ACCESS)
+ heap |= 2;
+ if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
+ goto no_slab;
+
+ switch (domain) {
+ case RADEON_DOMAIN_VRAM:
+ heap |= 0 * 4;
+ break;
+ case RADEON_DOMAIN_VRAM_GTT:
+ heap |= 1 * 4;
+ break;
+ case RADEON_DOMAIN_GTT:
+ heap |= 2 * 4;
+ break;
+ default:
goto no_slab;
+ }
entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
if (!entry) {
@@ -1171,16 +722,8 @@ amdgpu_bo_create(struct radeon_winsys *rws,
}
no_slab:
- if (flags & RADEON_FLAG_SPARSE) {
- assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0);
-
- flags |= RADEON_FLAG_NO_CPU_ACCESS;
-
- return amdgpu_bo_sparse_create(ws, size, domain, flags);
- }
-
/* This flag is irrelevant for the cache. */
- flags &= ~RADEON_FLAG_NO_SUBALLOC;
+ flags &= ~RADEON_FLAG_HANDLE;
/* Align size to page size. This is the minimum alignment for normal
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
@@ -1189,11 +732,22 @@ no_slab:
size = align64(size, ws->info.gart_page_size);
alignment = align(alignment, ws->info.gart_page_size);
- int heap = radeon_get_heap_index(domain, flags);
- assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
- usage = 1 << heap; /* Only set one usage bit for each heap. */
-
- pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap);
+ /* Only set one usage bit each for domains and flags, or the cache manager
+ * might consider different sets of domains / flags compatible
+ */
+ if (domain == RADEON_DOMAIN_VRAM_GTT)
+ usage = 1 << 2;
+ else
+ usage = domain >> 1;
+ assert(flags < sizeof(usage) * 8 - 3);
+ usage |= 1 << (flags + 3);
+
+ /* Determine the pb_cache bucket for minimizing pb_cache misses. */
+ pb_cache_bucket = 0;
+ if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */
+ pb_cache_bucket += 1;
+ if (flags == RADEON_FLAG_GTT_WC) /* WC */
+ pb_cache_bucket += 2;
assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
/* Get a buffer from the cache. */
@@ -1322,9 +876,10 @@ static bool amdgpu_bo_get_handle(struct pb_buffer *buffer,
enum amdgpu_bo_handle_type type;
int r;
- /* Don't allow exports of slab entries and sparse buffers. */
- if (!bo->bo)
- return false;
+ if (!bo->bo) {
+ offset += bo->va - bo->u.slab.real->va;
+ bo = bo->u.slab.real;
+ }
bo->u.real.use_reusable_pool = false;
@@ -1411,13 +966,6 @@ static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;
}
-static bool amdgpu_bo_is_suballocated(struct pb_buffer *buf)
-{
- struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
-
- return !bo->bo && !bo->sparse;
-}
-
static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)
{
return ((struct amdgpu_winsys_bo*)buf)->va;
@@ -1434,9 +982,7 @@ void amdgpu_bo_init_functions(struct amdgpu_winsys *ws)
ws->base.buffer_from_handle = amdgpu_bo_from_handle;
ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr;
- ws->base.buffer_is_suballocated = amdgpu_bo_is_suballocated;
ws->base.buffer_get_handle = amdgpu_bo_get_handle;
- ws->base.buffer_commit = amdgpu_bo_sparse_commit;
ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
}