summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/winsys/amdgpu
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2023-01-28 08:56:54 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2023-01-28 08:56:54 +0000
commitd305570c9b1fd87c4acdec589761cfa39fd04a3b (patch)
treee340315dd9d6966ccc3a48aa7a845e2213e40e62 /lib/mesa/src/gallium/winsys/amdgpu
parent1c5c7896c1d54abd25c0f33ca996165b359eecb3 (diff)
Merge Mesa 22.3.4
Diffstat (limited to 'lib/mesa/src/gallium/winsys/amdgpu')
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c110
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h8
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c415
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h6
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_public.h41
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c75
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h4
7 files changed, 322 insertions, 337 deletions
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index bcba77827..1b53bcc61 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -239,11 +239,8 @@ static void amdgpu_bo_destroy_or_cache(struct radeon_winsys *rws, struct pb_buff
static void amdgpu_clean_up_buffer_managers(struct amdgpu_winsys *ws)
{
- for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
+ for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++)
pb_slabs_reclaim(&ws->bo_slabs[i]);
- if (ws->info.has_tmz_support)
- pb_slabs_reclaim(&ws->bo_slabs_encrypted[i]);
- }
pb_cache_release_all_buffers(&ws->bo_cache);
}
@@ -530,9 +527,15 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (flags & RADEON_FLAG_GTT_WC)
request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
+ if (flags & RADEON_FLAG_DISCARDABLE &&
+ ws->info.drm_minor >= 47)
+ request.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
+
if (ws->zero_all_vram_allocs &&
(request.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM))
request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
+
if ((flags & RADEON_FLAG_ENCRYPTED) &&
ws->info.has_tmz_support) {
request.flags |= AMDGPU_GEM_CREATE_ENCRYPTED;
@@ -574,9 +577,13 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
if (!(flags & RADEON_FLAG_READ_ONLY))
vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
- if (flags & RADEON_FLAG_UNCACHED)
+ if (flags & RADEON_FLAG_GL2_BYPASS)
vm_flags |= AMDGPU_VM_MTYPE_UC;
+ if (flags & RADEON_FLAG_MALL_NOALLOC &&
+ ws->info.drm_minor >= 47)
+ vm_flags |= AMDGPU_VM_PAGE_NOALLOC;
+
r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
AMDGPU_VA_OP_MAP);
if (r)
@@ -629,14 +636,11 @@ bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
return amdgpu_bo_can_reclaim(priv, &bo->base);
}
-static struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size,
- enum radeon_bo_flag flags)
+static struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size)
{
- struct pb_slabs *bo_slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ?
- ws->bo_slabs_encrypted : ws->bo_slabs;
/* Find the correct slab allocator for the given size. */
for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
- struct pb_slabs *slabs = &bo_slabs[i];
+ struct pb_slabs *slabs = &ws->bo_slabs[i];
if (size <= 1 << (slabs->min_order + slabs->num_orders - 1))
return slabs;
@@ -663,7 +667,7 @@ static void amdgpu_bo_slab_destroy(struct radeon_winsys *rws, struct pb_buffer *
assert(!bo->bo);
- slabs = get_slabs(ws, bo->base.size, bo->base.usage & RADEON_FLAG_ENCRYPTED);
+ slabs = get_slabs(ws, bo->base.size);
if (bo->base.placement & RADEON_DOMAIN_VRAM)
ws->slab_wasted_vram -= get_slab_wasted_size(ws, bo);
@@ -699,10 +703,8 @@ static unsigned get_slab_entry_alignment(struct amdgpu_winsys *ws, unsigned size
return entry_size;
}
-static struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
- unsigned entry_size,
- unsigned group_index,
- bool encrypted)
+struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size,
+ unsigned group_index)
{
struct amdgpu_winsys *ws = priv;
struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab);
@@ -714,15 +716,9 @@ static struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
if (!slab)
return NULL;
- if (encrypted)
- flags |= RADEON_FLAG_ENCRYPTED;
-
- struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ?
- ws->bo_slabs_encrypted : ws->bo_slabs;
-
/* Determine the slab buffer size. */
for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
- unsigned max_entry_size = 1 << (slabs[i].min_order + slabs[i].num_orders - 1);
+ unsigned max_entry_size = 1 << (ws->bo_slabs[i].min_order + ws->bo_slabs[i].num_orders - 1);
if (entry_size <= max_entry_size) {
/* The slab size is twice the size of the largest possible entry. */
@@ -815,20 +811,6 @@ fail:
return NULL;
}
-struct pb_slab *amdgpu_bo_slab_alloc_encrypted(void *priv, unsigned heap,
- unsigned entry_size,
- unsigned group_index)
-{
- return amdgpu_bo_slab_alloc(priv, heap, entry_size, group_index, true);
-}
-
-struct pb_slab *amdgpu_bo_slab_alloc_normal(void *priv, unsigned heap,
- unsigned entry_size,
- unsigned group_index)
-{
- return amdgpu_bo_slab_alloc(priv, heap, entry_size, group_index, false);
-}
-
void amdgpu_bo_slab_free(struct amdgpu_winsys *ws, struct pb_slab *pslab)
{
struct amdgpu_slab *slab = amdgpu_slab(pslab);
@@ -1163,7 +1145,7 @@ amdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size,
if (r)
goto error_va_alloc;
- r = amdgpu_bo_va_op_raw(ws->dev, NULL, 0, size, bo->va,
+ r = amdgpu_bo_va_op_raw(ws->dev, NULL, 0, map_size, bo->va,
AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
if (r)
goto error_va_map;
@@ -1360,34 +1342,23 @@ amdgpu_bo_create(struct amdgpu_winsys *ws,
enum radeon_bo_flag flags)
{
struct amdgpu_winsys_bo *bo;
- int heap = -1;
- if (domain & (RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA))
- flags |= RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_SUBALLOC;
+ radeon_canonicalize_bo_flags(&domain, &flags);
- /* VRAM implies WC. This is not optional. */
- assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
-
- /* NO_CPU_ACCESS is not valid with GTT. */
- assert(!(domain & RADEON_DOMAIN_GTT) || !(flags & RADEON_FLAG_NO_CPU_ACCESS));
+ /* Handle sparse buffers first. */
+ if (flags & RADEON_FLAG_SPARSE) {
+ assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0);
- /* Sparse buffers must have NO_CPU_ACCESS set. */
- assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS);
+ return amdgpu_bo_sparse_create(ws, size, domain, flags);
+ }
- struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ?
- ws->bo_slabs_encrypted : ws->bo_slabs;
- struct pb_slabs *last_slab = &slabs[NUM_SLAB_ALLOCATORS - 1];
+ struct pb_slabs *last_slab = &ws->bo_slabs[NUM_SLAB_ALLOCATORS - 1];
unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1);
+ int heap = radeon_get_heap_index(domain, flags);
/* Sub-allocate small buffers from slabs. */
- if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) &&
- size <= max_slab_entry_size) {
+ if (heap >= 0 && size <= max_slab_entry_size) {
struct pb_slab_entry *entry;
- int heap = radeon_get_heap_index(domain, flags);
-
- if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS)
- goto no_slab;
-
unsigned alloc_size = size;
/* Always use slabs for sizes less than 4 KB because the kernel aligns
@@ -1410,7 +1381,7 @@ amdgpu_bo_create(struct amdgpu_winsys *ws,
}
}
- struct pb_slabs *slabs = get_slabs(ws, alloc_size, flags);
+ struct pb_slabs *slabs = get_slabs(ws, alloc_size);
entry = pb_slab_alloc(slabs, alloc_size, heap);
if (!entry) {
/* Clean up buffer managers and try again. */
@@ -1435,15 +1406,6 @@ amdgpu_bo_create(struct amdgpu_winsys *ws,
}
no_slab:
- if (flags & RADEON_FLAG_SPARSE) {
- assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0);
-
- return amdgpu_bo_sparse_create(ws, size, domain, flags);
- }
-
- /* This flag is irrelevant for the cache. */
- flags &= ~RADEON_FLAG_NO_SUBALLOC;
-
/* Align size to page size. This is the minimum alignment for normal
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
* like constant/uniform buffers, can benefit from better and more reuse.
@@ -1453,11 +1415,13 @@ no_slab:
alignment = align(alignment, ws->info.gart_page_size);
}
- bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
+ bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+ !(flags & RADEON_FLAG_DISCARDABLE);
if (use_reusable_pool) {
- heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_ENCRYPTED);
- assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
+ /* RADEON_FLAG_NO_SUBALLOC is irrelevant for the cache. */
+ heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_NO_SUBALLOC);
+ assert(heap >= 0 && heap < RADEON_NUM_HEAPS);
/* Get a buffer from the cache. */
bo = (struct amdgpu_winsys_bo*)
@@ -1586,7 +1550,8 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
/* Initialize the structure. */
simple_mtx_init(&bo->lock, mtx_plain);
pipe_reference_init(&bo->base.reference, 1);
- bo->base.alignment_log2 = util_logbase2(info.phys_alignment);
+ bo->base.alignment_log2 = util_logbase2(info.phys_alignment ?
+ info.phys_alignment : ws->info.gart_page_size);
bo->bo = result.buf_handle;
bo->base.size = result.alloc_size;
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
@@ -1697,7 +1662,8 @@ static bool amdgpu_bo_get_handle(struct radeon_winsys *rws,
}
static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
- void *pointer, uint64_t size)
+ void *pointer, uint64_t size,
+ enum radeon_bo_flag flags)
{
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
amdgpu_bo_handle buf_handle;
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
index 48bce54ec..2bd15af2a 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -132,12 +132,8 @@ void amdgpu_bo_unmap(struct radeon_winsys *rws, struct pb_buffer *buf);
void amdgpu_bo_init_functions(struct amdgpu_screen_winsys *ws);
bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry);
-struct pb_slab *amdgpu_bo_slab_alloc_encrypted(void *priv, unsigned heap,
- unsigned entry_size,
- unsigned group_index);
-struct pb_slab *amdgpu_bo_slab_alloc_normal(void *priv, unsigned heap,
- unsigned entry_size,
- unsigned group_index);
+struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size,
+ unsigned group_index);
void amdgpu_bo_slab_free(struct amdgpu_winsys *ws, struct pb_slab *slab);
static inline
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index fc2340a06..40b8c5850 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -273,11 +273,30 @@ amdgpu_cs_get_next_fence(struct radeon_cmdbuf *rcs)
/* CONTEXTS */
-static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws)
+static uint32_t
+radeon_to_amdgpu_priority(enum radeon_ctx_priority radeon_priority)
+{
+ switch (radeon_priority) {
+ case RADEON_CTX_PRIORITY_REALTIME:
+ return AMDGPU_CTX_PRIORITY_VERY_HIGH;
+ case RADEON_CTX_PRIORITY_HIGH:
+ return AMDGPU_CTX_PRIORITY_HIGH;
+ case RADEON_CTX_PRIORITY_MEDIUM:
+ return AMDGPU_CTX_PRIORITY_NORMAL;
+ case RADEON_CTX_PRIORITY_LOW:
+ return AMDGPU_CTX_PRIORITY_LOW;
+ default:
+ unreachable("Invalid context priority");
+ }
+}
+
+static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws,
+ enum radeon_ctx_priority priority)
{
struct amdgpu_ctx *ctx = CALLOC_STRUCT(amdgpu_ctx);
int r;
struct amdgpu_bo_alloc_request alloc_buffer = {};
+ uint32_t amdgpu_priority = radeon_to_amdgpu_priority(priority);
amdgpu_bo_handle buf_handle;
if (!ctx)
@@ -287,9 +306,9 @@ static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws)
ctx->refcount = 1;
ctx->initial_num_total_rejected_cs = ctx->ws->num_total_rejected_cs;
- r = amdgpu_cs_ctx_create(ctx->ws->dev, &ctx->ctx);
+ r = amdgpu_cs_ctx_create2(ctx->ws->dev, amdgpu_priority, &ctx->ctx);
if (r) {
- fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", r);
+ fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create2 failed. (%i)\n", r);
goto error_create;
}
@@ -389,7 +408,7 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx, bool full_reset_o
if (ctx->ws->num_total_rejected_cs > ctx->initial_num_total_rejected_cs) {
if (needs_reset)
*needs_reset = true;
- return ctx->num_rejected_cs ? PIPE_GUILTY_CONTEXT_RESET :
+ return ctx->rejected_any_cs ? PIPE_GUILTY_CONTEXT_RESET :
PIPE_INNOCENT_CONTEXT_RESET;
}
if (needs_reset)
@@ -714,18 +733,20 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws,
buffer_size = MIN2(buffer_size, max_size);
buffer_size = MAX2(buffer_size, min_size); /* min_size is more important */
- enum radeon_bo_domain domain;
+ /* Use cached GTT for command buffers. Writing to other heaps is very slow on the CPU.
+ * The speed of writing to GTT WC is somewhere between no difference and very slow, while
+ * VRAM being very slow a lot more often.
+ */
+ enum radeon_bo_domain domain = RADEON_DOMAIN_GTT;
unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING;
- if (cs->ring_type == RING_GFX ||
- cs->ring_type == RING_COMPUTE ||
- cs->ring_type == RING_DMA) {
- domain = ws->info.smart_access_memory ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT;
- flags |= RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC;
- } else {
- /* UVD/VCE */
- /* TODO: validate that UVD/VCE don't read from IBs and enable WC or even VRAM. */
- domain = RADEON_DOMAIN_GTT;
+ if (cs->ip_type == AMD_IP_GFX ||
+ cs->ip_type == AMD_IP_COMPUTE ||
+ cs->ip_type == AMD_IP_SDMA) {
+ /* Avoids hangs with "rendercheck -t cacomposite -f a8r8g8b8" via glamor
+ * on Navi 14
+ */
+ flags |= RADEON_FLAG_32BIT;
}
pb = amdgpu_bo_create(ws, buffer_size,
@@ -812,7 +833,8 @@ static void amdgpu_set_ib_size(struct radeon_cmdbuf *rcs, struct amdgpu_ib *ib)
{
if (ib->ptr_ib_size_inside_ib) {
*ib->ptr_ib_size = rcs->current.cdw |
- S_3F2_CHAIN(1) | S_3F2_VALID(1);
+ S_3F2_CHAIN(1) | S_3F2_VALID(1) |
+ S_3F2_PRE_ENA(((struct amdgpu_cs*)ib)->preamble_ib_bo != NULL);
} else {
*ib->ptr_ib_size = rcs->current.cdw;
}
@@ -829,40 +851,40 @@ static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct radeon_cmdbuf *r
static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws,
struct amdgpu_cs_context *cs,
- enum ring_type ring_type)
+ enum amd_ip_type ip_type)
{
- switch (ring_type) {
- case RING_DMA:
+ switch (ip_type) {
+ case AMD_IP_SDMA:
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_DMA;
break;
- case RING_UVD:
+ case AMD_IP_UVD:
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD;
break;
- case RING_UVD_ENC:
+ case AMD_IP_UVD_ENC:
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC;
break;
- case RING_VCE:
+ case AMD_IP_VCE:
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE;
break;
- case RING_VCN_DEC:
+ case AMD_IP_VCN_DEC:
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_DEC;
break;
- case RING_VCN_ENC:
+ case AMD_IP_VCN_ENC:
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_ENC;
break;
- case RING_VCN_JPEG:
+ case AMD_IP_VCN_JPEG:
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_JPEG;
break;
- case RING_COMPUTE:
- case RING_GFX:
- cs->ib[IB_MAIN].ip_type = ring_type == RING_GFX ? AMDGPU_HW_IP_GFX :
+ case AMD_IP_COMPUTE:
+ case AMD_IP_GFX:
+ cs->ib[IB_MAIN].ip_type = ip_type == AMD_IP_GFX ? AMDGPU_HW_IP_GFX :
AMDGPU_HW_IP_COMPUTE;
/* The kernel shouldn't invalidate L2 and vL1. The proper place for cache
@@ -873,14 +895,19 @@ static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws,
* the next IB starts drawing, and so the cache flush at the end of IB
* is always late.
*/
- if (ws->info.drm_minor >= 26)
+ if (ws->info.drm_minor >= 26) {
+ cs->ib[IB_PREAMBLE].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE;
cs->ib[IB_MAIN].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE;
+ }
break;
default:
assert(0);
}
+ cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAG_PREAMBLE;
+ cs->ib[IB_PREAMBLE].ip_type = cs->ib[IB_MAIN].ip_type;
+
cs->last_added_bo = NULL;
return true;
}
@@ -931,11 +958,11 @@ static void amdgpu_destroy_cs_context(struct amdgpu_winsys *ws, struct amdgpu_cs
static bool
amdgpu_cs_create(struct radeon_cmdbuf *rcs,
struct radeon_winsys_ctx *rwctx,
- enum ring_type ring_type,
+ enum amd_ip_type ip_type,
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence),
void *flush_ctx,
- bool stop_exec_on_failure)
+ bool allow_context_lost)
{
struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx;
struct amdgpu_cs *cs;
@@ -951,25 +978,25 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs,
cs->ctx = ctx;
cs->flush_cs = flush;
cs->flush_data = flush_ctx;
- cs->ring_type = ring_type;
- cs->stop_exec_on_failure = stop_exec_on_failure;
+ cs->ip_type = ip_type;
+ cs->allow_context_lost = allow_context_lost;
cs->noop = ctx->ws->noop_cs;
- cs->has_chaining = ctx->ws->info.chip_class >= GFX7 &&
- (ring_type == RING_GFX || ring_type == RING_COMPUTE);
+ cs->has_chaining = ctx->ws->info.gfx_level >= GFX7 &&
+ (ip_type == AMD_IP_GFX || ip_type == AMD_IP_COMPUTE);
struct amdgpu_cs_fence_info fence_info;
fence_info.handle = cs->ctx->user_fence_bo;
- fence_info.offset = cs->ring_type * 4;
+ fence_info.offset = cs->ip_type * 4;
amdgpu_cs_chunk_fence_info_to_data(&fence_info, (void*)&cs->fence_chunk);
cs->main.ib_type = IB_MAIN;
- if (!amdgpu_init_cs_context(ctx->ws, &cs->csc1, ring_type)) {
+ if (!amdgpu_init_cs_context(ctx->ws, &cs->csc1, ip_type)) {
FREE(cs);
return false;
}
- if (!amdgpu_init_cs_context(ctx->ws, &cs->csc2, ring_type)) {
+ if (!amdgpu_init_cs_context(ctx->ws, &cs->csc2, ip_type)) {
amdgpu_destroy_cs_context(ctx->ws, &cs->csc1);
FREE(cs);
return false;
@@ -1003,6 +1030,13 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs,
return true;
}
+static void amdgpu_cs_set_preamble(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
+ unsigned preamble_num_dw, bool preamble_changed)
+{
+ /* TODO: implement this properly */
+ radeon_emit_array(cs, preamble_ib, preamble_num_dw);
+}
+
static bool
amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib,
unsigned preamble_num_dw)
@@ -1034,14 +1068,12 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i
memcpy(map, preamble_ib, preamble_num_dw * 4);
/* Pad the IB. */
- uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ring_type];
+ uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ip_type];
while (preamble_num_dw & ib_pad_dw_mask)
map[preamble_num_dw++] = PKT3_NOP_PAD;
amdgpu_bo_unmap(&ws->dummy_ws.base, preamble_bo);
for (unsigned i = 0; i < 2; i++) {
- csc[i]->ib[IB_PREAMBLE] = csc[i]->ib[IB_MAIN];
- csc[i]->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAG_PREAMBLE;
csc[i]->ib[IB_PREAMBLE].va_start = amdgpu_winsys_bo(preamble_bo)->va;
csc[i]->ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4;
@@ -1112,7 +1144,7 @@ static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw)
rcs->current.max_dw += cs_epilog_dw;
/* Pad with NOPs but leave 4 dwords for INDIRECT_BUFFER. */
- uint32_t ib_pad_dw_mask = cs->ws->info.ib_pad_dw_mask[cs->ring_type];
+ uint32_t ib_pad_dw_mask = cs->ws->info.ib_pad_dw_mask[cs->ip_type];
while ((rcs->current.cdw & ib_pad_dw_mask) != ib_pad_dw_mask - 3)
radeon_emit(rcs, PKT3_NOP_PAD);
@@ -1194,8 +1226,8 @@ static bool is_noop_fence_dependency(struct amdgpu_cs *acs,
* We always want no dependency between back-to-back gfx IBs, because
* we need the parallelism between IBs for good performance.
*/
- if ((acs->ring_type == RING_GFX ||
- acs->ws->info.num_rings[acs->ring_type] == 1) &&
+ if ((acs->ip_type == AMD_IP_GFX ||
+ acs->ws->info.ip[acs->ip_type].num_queues == 1) &&
!amdgpu_fence_is_syncobj(fence) &&
fence->ctx == acs->ctx &&
fence->fence.ip_type == cs->ib[IB_MAIN].ip_type)
@@ -1455,163 +1487,164 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
}
}
- if (acs->ring_type == RING_GFX)
+ if (acs->ip_type == AMD_IP_GFX)
ws->gfx_bo_list_counter += cs->num_real_buffers;
- bool noop = false;
+ struct drm_amdgpu_cs_chunk chunks[7];
+ unsigned num_chunks = 0;
- if (acs->stop_exec_on_failure && acs->ctx->num_rejected_cs) {
- r = -ECANCELED;
- } else {
- struct drm_amdgpu_cs_chunk chunks[7];
- unsigned num_chunks = 0;
-
- /* BO list */
- if (!use_bo_list_create) {
- chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES;
- chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_bo_list_in) / 4;
- chunks[num_chunks].chunk_data = (uintptr_t)&bo_list_in;
- num_chunks++;
- }
-
- /* Fence dependencies. */
- unsigned num_dependencies = cs->fence_dependencies.num;
- if (num_dependencies) {
- struct drm_amdgpu_cs_chunk_dep *dep_chunk =
- alloca(num_dependencies * sizeof(*dep_chunk));
+ /* BO list */
+ if (!use_bo_list_create) {
+ chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES;
+ chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_bo_list_in) / 4;
+ chunks[num_chunks].chunk_data = (uintptr_t)&bo_list_in;
+ num_chunks++;
+ }
- for (unsigned i = 0; i < num_dependencies; i++) {
- struct amdgpu_fence *fence =
- (struct amdgpu_fence*)cs->fence_dependencies.list[i];
+ /* Fence dependencies. */
+ unsigned num_dependencies = cs->fence_dependencies.num;
+ if (num_dependencies) {
+ struct drm_amdgpu_cs_chunk_dep *dep_chunk =
+ alloca(num_dependencies * sizeof(*dep_chunk));
- assert(util_queue_fence_is_signalled(&fence->submitted));
- amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]);
- }
+ for (unsigned i = 0; i < num_dependencies; i++) {
+ struct amdgpu_fence *fence =
+ (struct amdgpu_fence*)cs->fence_dependencies.list[i];
- chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
- chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_dependencies;
- chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk;
- num_chunks++;
+ assert(util_queue_fence_is_signalled(&fence->submitted));
+ amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]);
}
- /* Syncobj dependencies. */
- unsigned num_syncobj_dependencies = cs->syncobj_dependencies.num;
- if (num_syncobj_dependencies) {
- struct drm_amdgpu_cs_chunk_sem *sem_chunk =
- alloca(num_syncobj_dependencies * sizeof(sem_chunk[0]));
+ chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
+ chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_dependencies;
+ chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk;
+ num_chunks++;
+ }
- for (unsigned i = 0; i < num_syncobj_dependencies; i++) {
- struct amdgpu_fence *fence =
- (struct amdgpu_fence*)cs->syncobj_dependencies.list[i];
+ /* Syncobj dependencies. */
+ unsigned num_syncobj_dependencies = cs->syncobj_dependencies.num;
+ if (num_syncobj_dependencies) {
+ struct drm_amdgpu_cs_chunk_sem *sem_chunk =
+ alloca(num_syncobj_dependencies * sizeof(sem_chunk[0]));
- if (!amdgpu_fence_is_syncobj(fence))
- continue;
+ for (unsigned i = 0; i < num_syncobj_dependencies; i++) {
+ struct amdgpu_fence *fence =
+ (struct amdgpu_fence*)cs->syncobj_dependencies.list[i];
- assert(util_queue_fence_is_signalled(&fence->submitted));
- sem_chunk[i].handle = fence->syncobj;
- }
+ if (!amdgpu_fence_is_syncobj(fence))
+ continue;
- chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN;
- chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 * num_syncobj_dependencies;
- chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk;
- num_chunks++;
+ assert(util_queue_fence_is_signalled(&fence->submitted));
+ sem_chunk[i].handle = fence->syncobj;
}
- /* Syncobj signals. */
- unsigned num_syncobj_to_signal = cs->syncobj_to_signal.num;
- if (num_syncobj_to_signal) {
- struct drm_amdgpu_cs_chunk_sem *sem_chunk =
- alloca(num_syncobj_to_signal * sizeof(sem_chunk[0]));
+ chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN;
+ chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 * num_syncobj_dependencies;
+ chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk;
+ num_chunks++;
+ }
- for (unsigned i = 0; i < num_syncobj_to_signal; i++) {
- struct amdgpu_fence *fence =
- (struct amdgpu_fence*)cs->syncobj_to_signal.list[i];
+ /* Syncobj signals. */
+ unsigned num_syncobj_to_signal = cs->syncobj_to_signal.num;
+ if (num_syncobj_to_signal) {
+ struct drm_amdgpu_cs_chunk_sem *sem_chunk =
+ alloca(num_syncobj_to_signal * sizeof(sem_chunk[0]));
- assert(amdgpu_fence_is_syncobj(fence));
- sem_chunk[i].handle = fence->syncobj;
- }
+ for (unsigned i = 0; i < num_syncobj_to_signal; i++) {
+ struct amdgpu_fence *fence =
+ (struct amdgpu_fence*)cs->syncobj_to_signal.list[i];
- chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_OUT;
- chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4
- * num_syncobj_to_signal;
- chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk;
- num_chunks++;
+ assert(amdgpu_fence_is_syncobj(fence));
+ sem_chunk[i].handle = fence->syncobj;
}
- /* Fence */
- if (has_user_fence) {
- chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE;
- chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
- chunks[num_chunks].chunk_data = (uintptr_t)&acs->fence_chunk;
- num_chunks++;
- }
+ chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_OUT;
+ chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4
+ * num_syncobj_to_signal;
+ chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk;
+ num_chunks++;
+ }
- /* IB */
- if (cs->ib[IB_PREAMBLE].ib_bytes) {
- chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
- chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
- chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PREAMBLE];
- num_chunks++;
- }
+ /* Fence */
+ if (has_user_fence) {
+ chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE;
+ chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
+ chunks[num_chunks].chunk_data = (uintptr_t)&acs->fence_chunk;
+ num_chunks++;
+ }
- /* IB */
- cs->ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */
+ /* IB */
+ if (cs->ib[IB_PREAMBLE].ib_bytes) {
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
- chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN];
+ chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PREAMBLE];
num_chunks++;
+ }
- if (cs->secure) {
- cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAGS_SECURE;
- cs->ib[IB_MAIN].flags |= AMDGPU_IB_FLAGS_SECURE;
- } else {
- cs->ib[IB_PREAMBLE].flags &= ~AMDGPU_IB_FLAGS_SECURE;
- cs->ib[IB_MAIN].flags &= ~AMDGPU_IB_FLAGS_SECURE;
- }
+ /* IB */
+ cs->ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */
+ chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
+ chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
+ chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN];
+ num_chunks++;
- /* Apply RADEON_NOOP. */
- if (acs->noop) {
- if (acs->ring_type == RING_GFX) {
- /* Reduce the IB size and fill it with NOP to make it like an empty IB. */
- unsigned noop_size = MIN2(cs->ib[IB_MAIN].ib_bytes, ws->info.ib_alignment);
+ if (cs->secure) {
+ cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAGS_SECURE;
+ cs->ib[IB_MAIN].flags |= AMDGPU_IB_FLAGS_SECURE;
+ } else {
+ cs->ib[IB_PREAMBLE].flags &= ~AMDGPU_IB_FLAGS_SECURE;
+ cs->ib[IB_MAIN].flags &= ~AMDGPU_IB_FLAGS_SECURE;
+ }
- cs->ib_main_addr[0] = PKT3(PKT3_NOP, noop_size / 4 - 2, 0);
- cs->ib[IB_MAIN].ib_bytes = noop_size;
- } else {
- noop = true;
- }
- }
+ bool noop = acs->noop;
- assert(num_chunks <= ARRAY_SIZE(chunks));
+ if (noop && acs->ip_type == AMD_IP_GFX) {
+ /* Reduce the IB size and fill it with NOP to make it like an empty IB. */
+ unsigned noop_size = MIN2(cs->ib[IB_MAIN].ib_bytes, ws->info.ib_alignment);
- r = noop ? 0 : amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list,
- num_chunks, chunks, &seq_no);
+ cs->ib_main_addr[0] = PKT3(PKT3_NOP, noop_size / 4 - 2, 0);
+ cs->ib[IB_MAIN].ib_bytes = noop_size;
+ noop = false;
}
- if (r) {
- if (r == -ENOMEM)
- fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
- else if (r == -ECANCELED)
- fprintf(stderr, "amdgpu: The CS has been cancelled because the context is lost.\n");
- else
- fprintf(stderr, "amdgpu: The CS has been rejected, "
- "see dmesg for more information (%i).\n", r);
-
- acs->ctx->num_rejected_cs++;
- ws->num_total_rejected_cs++;
- } else if (!noop) {
- /* Success. */
- uint64_t *user_fence = NULL;
-
- /* Need to reserve 4 QWORD for user fence:
- * QWORD[0]: completed fence
- * QWORD[1]: preempted fence
- * QWORD[2]: reset fence
- * QWORD[3]: preempted then reset
- **/
- if (has_user_fence)
- user_fence = acs->ctx->user_fence_cpu_address_base + acs->ring_type * 4;
- amdgpu_fence_submitted(cs->fence, seq_no, user_fence);
+ assert(num_chunks <= ARRAY_SIZE(chunks));
+
+ if (unlikely(acs->ctx->rejected_any_cs)) {
+ r = -ECANCELED;
+ } else if (unlikely(noop)) {
+ r = 0;
+ } else {
+ /* Submit the command buffer.
+ *
+ * The kernel returns -ENOMEM with many parallel processes using GDS such as test suites
+ * quite often, but it eventually succeeds after enough attempts. This happens frequently
+ * with dEQP using NGG streamout.
+ */
+ r = 0;
+
+ do {
+ /* Wait 1 ms and try again. */
+ if (r == -ENOMEM)
+ os_time_sleep(1000);
+
+ r = amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list,
+ num_chunks, chunks, &seq_no);
+ } while (r == -ENOMEM);
+
+ if (!r) {
+ /* Success. */
+ uint64_t *user_fence = NULL;
+
+ /* Need to reserve 4 QWORD for user fence:
+ * QWORD[0]: completed fence
+ * QWORD[1]: preempted fence
+ * QWORD[2]: reset fence
+ * QWORD[3]: preempted then reset
+ */
+ if (has_user_fence)
+ user_fence = acs->ctx->user_fence_cpu_address_base + acs->ip_type * 4;
+ amdgpu_fence_submitted(cs->fence, seq_no, user_fence);
+ }
}
/* Cleanup. */
@@ -1619,6 +1652,23 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
amdgpu_bo_list_destroy_raw(ws->dev, bo_list);
cleanup:
+ if (unlikely(r)) {
+ if (!acs->allow_context_lost) {
+ /* Non-robust contexts are allowed to terminate the process. The only alternative is
+ * to skip command submission, which would look like a freeze because nothing is drawn,
+ * which is not a useful state to be in under any circumstances.
+ */
+ fprintf(stderr, "amdgpu: The CS has been rejected (%i), but the context isn't robust.\n", r);
+ fprintf(stderr, "amdgpu: The process will be terminated.\n");
+ exit(1);
+ }
+
+ fprintf(stderr, "amdgpu: The CS has been rejected (%i). Recreate the context.\n", r);
+ if (!acs->ctx->rejected_any_cs)
+ ws->num_total_rejected_cs++;
+ acs->ctx->rejected_any_cs = true;
+ }
+
/* If there was an error, signal the fence, because it won't be signalled
* by the hardware. */
if (r || noop)
@@ -1653,14 +1703,14 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
struct amdgpu_cs *cs = amdgpu_cs(rcs);
struct amdgpu_winsys *ws = cs->ws;
int error_code = 0;
- uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ring_type];
+ uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ip_type];
rcs->current.max_dw += amdgpu_cs_epilog_dws(cs);
/* Pad the IB according to the mask. */
- switch (cs->ring_type) {
- case RING_DMA:
- if (ws->info.chip_class <= GFX6) {
+ switch (cs->ip_type) {
+ case AMD_IP_SDMA:
+ if (ws->info.gfx_level <= GFX6) {
while (rcs->current.cdw & ib_pad_dw_mask)
radeon_emit(rcs, 0xf0000000); /* NOP packet */
} else {
@@ -1668,8 +1718,8 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
radeon_emit(rcs, SDMA_NOP_PAD);
}
break;
- case RING_GFX:
- case RING_COMPUTE:
+ case AMD_IP_GFX:
+ case AMD_IP_COMPUTE:
if (ws->info.gfx_ib_pad_with_type2) {
while (rcs->current.cdw & ib_pad_dw_mask)
radeon_emit(rcs, PKT2_NOP_PAD);
@@ -1677,15 +1727,15 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
while (rcs->current.cdw & ib_pad_dw_mask)
radeon_emit(rcs, PKT3_NOP_PAD);
}
- if (cs->ring_type == RING_GFX)
+ if (cs->ip_type == AMD_IP_GFX)
ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4;
break;
- case RING_UVD:
- case RING_UVD_ENC:
+ case AMD_IP_UVD:
+ case AMD_IP_UVD_ENC:
while (rcs->current.cdw & ib_pad_dw_mask)
radeon_emit(rcs, 0x80000000); /* type2 nop packet */
break;
- case RING_VCN_JPEG:
+ case AMD_IP_VCN_JPEG:
if (rcs->current.cdw % 2)
assert(0);
while (rcs->current.cdw & ib_pad_dw_mask) {
@@ -1693,7 +1743,7 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
radeon_emit(rcs, 0x00000000);
}
break;
- case RING_VCN_DEC:
+ case AMD_IP_VCN_DEC:
while (rcs->current.cdw & ib_pad_dw_mask)
radeon_emit(rcs, 0x81ff); /* nop packet */
break;
@@ -1768,9 +1818,9 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
rcs->used_gart_kb = 0;
rcs->used_vram_kb = 0;
- if (cs->ring_type == RING_GFX)
+ if (cs->ip_type == AMD_IP_GFX)
ws->num_gfx_IBs++;
- else if (cs->ring_type == RING_DMA)
+ else if (cs->ip_type == AMD_IP_SDMA)
ws->num_sdma_IBs++;
return error_code;
@@ -1811,6 +1861,7 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws)
ws->base.ctx_destroy = amdgpu_ctx_destroy;
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
ws->base.cs_create = amdgpu_cs_create;
+ ws->base.cs_set_preamble = amdgpu_cs_set_preamble;
ws->base.cs_setup_preemption = amdgpu_cs_setup_preemption;
ws->base.cs_destroy = amdgpu_cs_destroy;
ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
index 794d13bd0..13b8bf73d 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -45,7 +45,7 @@ struct amdgpu_ctx {
uint64_t *user_fence_cpu_address_base;
int refcount;
unsigned initial_num_total_rejected_cs;
- unsigned num_rejected_cs;
+ bool rejected_any_cs;
};
struct amdgpu_cs_buffer {
@@ -131,7 +131,7 @@ struct amdgpu_cs {
struct amdgpu_ib main; /* must be first because this is inherited */
struct amdgpu_winsys *ws;
struct amdgpu_ctx *ctx;
- enum ring_type ring_type;
+ enum amd_ip_type ip_type;
struct drm_amdgpu_cs_chunk_fence fence_chunk;
/* We flip between these two CS. While one is being consumed
@@ -154,7 +154,7 @@ struct amdgpu_cs {
/* Flush CS. */
void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
void *flush_data;
- bool stop_exec_on_failure;
+ bool allow_context_lost;
bool noop;
bool has_chaining;
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_public.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_public.h
deleted file mode 100644
index f403ed997..000000000
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_public.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright © 2015 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-#ifndef AMDGPU_PUBLIC_H
-#define AMDGPU_PUBLIC_H
-
-#include "pipe/p_defines.h"
-#include "gallium/winsys/radeon/drm/radeon_drm_public.h"
-
-struct radeon_winsys;
-struct pipe_screen;
-struct pipe_screen_config;
-
-struct radeon_winsys *
-amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
- radeon_screen_create_t screen_create);
-
-#endif
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index cc71b0e92..05ff784d3 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -28,7 +28,6 @@
*/
#include "amdgpu_cs.h"
-#include "amdgpu_public.h"
#include "util/os_file.h"
#include "util/os_misc.h"
@@ -45,7 +44,7 @@
#include "sid.h"
static struct hash_table *dev_tab = NULL;
-static simple_mtx_t dev_tab_mutex = _SIMPLE_MTX_INITIALIZER_NP;
+static simple_mtx_t dev_tab_mutex = SIMPLE_MTX_INITIALIZER;
#if DEBUG
DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false)
@@ -61,23 +60,25 @@ static void handle_env_var_force_family(struct amdgpu_winsys *ws)
for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
if (!strcmp(family, ac_get_llvm_processor_name(i))) {
- /* Override family and chip_class. */
+ /* Override family and gfx_level. */
ws->info.family = i;
ws->info.name = "NOOP";
strcpy(ws->info.lowercase_name , "noop");
- if (i >= CHIP_SIENNA_CICHLID)
- ws->info.chip_class = GFX10_3;
+ if (i >= CHIP_GFX1100)
+ ws->info.gfx_level = GFX11;
+ else if (i >= CHIP_NAVI21)
+ ws->info.gfx_level = GFX10_3;
else if (i >= CHIP_NAVI10)
- ws->info.chip_class = GFX10;
+ ws->info.gfx_level = GFX10;
else if (i >= CHIP_VEGA10)
- ws->info.chip_class = GFX9;
+ ws->info.gfx_level = GFX9;
else if (i >= CHIP_TONGA)
- ws->info.chip_class = GFX8;
+ ws->info.gfx_level = GFX8;
else if (i >= CHIP_BONAIRE)
- ws->info.chip_class = GFX7;
+ ws->info.gfx_level = GFX7;
else
- ws->info.chip_class = GFX6;
+ ws->info.gfx_level = GFX6;
/* Don't submit any IBs. */
setenv("RADEON_NOOP", "1", 1);
@@ -94,7 +95,7 @@ static bool do_winsys_init(struct amdgpu_winsys *ws,
const struct pipe_screen_config *config,
int fd)
{
- if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
+ if (!ac_query_gpu_info(fd, ws->dev, &ws->info))
goto fail;
/* TODO: Enable this once the kernel handles it efficiently. */
@@ -362,6 +363,34 @@ static bool amdgpu_cs_is_secure(struct radeon_cmdbuf *rcs)
return cs->csc->secure;
}
+static uint32_t
+radeon_to_amdgpu_pstate(enum radeon_ctx_pstate pstate)
+{
+ switch (pstate) {
+ case RADEON_CTX_PSTATE_NONE:
+ return AMDGPU_CTX_STABLE_PSTATE_NONE;
+ case RADEON_CTX_PSTATE_STANDARD:
+ return AMDGPU_CTX_STABLE_PSTATE_STANDARD;
+ case RADEON_CTX_PSTATE_MIN_SCLK:
+ return AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
+ case RADEON_CTX_PSTATE_MIN_MCLK:
+ return AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
+ case RADEON_CTX_PSTATE_PEAK:
+ return AMDGPU_CTX_STABLE_PSTATE_PEAK;
+ default:
+ unreachable("Invalid pstate");
+ }
+}
+
+static bool
+amdgpu_cs_set_pstate(struct radeon_cmdbuf *rcs, enum radeon_ctx_pstate pstate)
+{
+ struct amdgpu_cs *cs = amdgpu_cs(rcs);
+ uint32_t amdgpu_pstate = radeon_to_amdgpu_pstate(pstate);
+ return amdgpu_cs_ctx_stable_pstate(cs->ctx->ctx,
+ AMDGPU_CTX_OP_SET_STABLE_PSTATE, amdgpu_pstate, NULL) == 0;
+}
+
PUBLIC struct radeon_winsys *
amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
radeon_screen_create_t screen_create)
@@ -450,9 +479,9 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
goto fail_alloc;
/* Create managers. */
- pb_cache_init(&aws->bo_cache, RADEON_MAX_CACHED_HEAPS,
+ pb_cache_init(&aws->bo_cache, RADEON_NUM_HEAPS,
500000, aws->check_vm ? 1.0f : 2.0f, 0,
- (aws->info.vram_size + aws->info.gart_size) / 8, aws,
+ ((uint64_t)aws->info.vram_size_kb + aws->info.gart_size_kb) * 1024 / 8, aws,
/* Cast to void* because one of the function parameters
* is a struct pointer instead of void*. */
(void*)amdgpu_bo_destroy, (void*)amdgpu_bo_can_reclaim);
@@ -470,25 +499,10 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
if (!pb_slabs_init(&aws->bo_slabs[i],
min_order, max_order,
- RADEON_MAX_SLAB_HEAPS, true,
- aws,
- amdgpu_bo_can_reclaim_slab,
- amdgpu_bo_slab_alloc_normal,
- /* Cast to void* because one of the function parameters
- * is a struct pointer instead of void*. */
- (void*)amdgpu_bo_slab_free)) {
- amdgpu_winsys_destroy(&ws->base);
- simple_mtx_unlock(&dev_tab_mutex);
- return NULL;
- }
-
- if (aws->info.has_tmz_support &&
- !pb_slabs_init(&aws->bo_slabs_encrypted[i],
- min_order, max_order,
- RADEON_MAX_SLAB_HEAPS, true,
+ RADEON_NUM_HEAPS, true,
aws,
amdgpu_bo_can_reclaim_slab,
- amdgpu_bo_slab_alloc_encrypted,
+ amdgpu_bo_slab_alloc,
/* Cast to void* because one of the function parameters
* is a struct pointer instead of void*. */
(void*)amdgpu_bo_slab_free)) {
@@ -546,6 +560,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
ws->base.read_registers = amdgpu_read_registers;
ws->base.pin_threads_to_L3_cache = amdgpu_pin_threads_to_L3_cache;
ws->base.cs_is_secure = amdgpu_cs_is_secure;
+ ws->base.cs_set_pstate = amdgpu_cs_set_pstate;
amdgpu_bo_init_functions(ws);
amdgpu_cs_init_functions(ws);
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
index 26e81f94d..1bff953a1 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
@@ -30,7 +30,7 @@
#include "pipebuffer/pb_cache.h"
#include "pipebuffer/pb_slab.h"
-#include "gallium/drivers/radeon/radeon_winsys.h"
+#include "winsys/radeon_winsys.h"
#include "util/simple_mtx.h"
#include "util/u_queue.h"
#include <amdgpu.h>
@@ -64,7 +64,6 @@ struct amdgpu_winsys {
* need to layer the allocators, so that we don't waste too much memory.
*/
struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
- struct pb_slabs bo_slabs_encrypted[NUM_SLAB_ALLOCATORS];
amdgpu_device_handle dev;
@@ -93,7 +92,6 @@ struct amdgpu_winsys {
/* multithreaded IB submission */
struct util_queue cs_queue;
- struct amdgpu_gpu_info amdinfo;
struct ac_addrlib *addrlib;
bool check_vm;