diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2023-01-28 08:56:54 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2023-01-28 08:56:54 +0000 |
commit | d305570c9b1fd87c4acdec589761cfa39fd04a3b (patch) | |
tree | e340315dd9d6966ccc3a48aa7a845e2213e40e62 /lib/mesa/src/gallium/winsys/amdgpu | |
parent | 1c5c7896c1d54abd25c0f33ca996165b359eecb3 (diff) |
Merge Mesa 22.3.4
Diffstat (limited to 'lib/mesa/src/gallium/winsys/amdgpu')
7 files changed, 322 insertions, 337 deletions
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index bcba77827..1b53bcc61 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -239,11 +239,8 @@ static void amdgpu_bo_destroy_or_cache(struct radeon_winsys *rws, struct pb_buff static void amdgpu_clean_up_buffer_managers(struct amdgpu_winsys *ws) { - for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) pb_slabs_reclaim(&ws->bo_slabs[i]); - if (ws->info.has_tmz_support) - pb_slabs_reclaim(&ws->bo_slabs_encrypted[i]); - } pb_cache_release_all_buffers(&ws->bo_cache); } @@ -530,9 +527,15 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; if (flags & RADEON_FLAG_GTT_WC) request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; + + if (flags & RADEON_FLAG_DISCARDABLE && + ws->info.drm_minor >= 47) + request.flags |= AMDGPU_GEM_CREATE_DISCARDABLE; + if (ws->zero_all_vram_allocs && (request.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)) request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; + if ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) { request.flags |= AMDGPU_GEM_CREATE_ENCRYPTED; @@ -574,9 +577,13 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, if (!(flags & RADEON_FLAG_READ_ONLY)) vm_flags |= AMDGPU_VM_PAGE_WRITEABLE; - if (flags & RADEON_FLAG_UNCACHED) + if (flags & RADEON_FLAG_GL2_BYPASS) vm_flags |= AMDGPU_VM_MTYPE_UC; + if (flags & RADEON_FLAG_MALL_NOALLOC && + ws->info.drm_minor >= 47) + vm_flags |= AMDGPU_VM_PAGE_NOALLOC; + r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags, AMDGPU_VA_OP_MAP); if (r) @@ -629,14 +636,11 @@ bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry) return amdgpu_bo_can_reclaim(priv, &bo->base); } -static struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size, - enum radeon_bo_flag flags) +static struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size) { - struct pb_slabs *bo_slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ? - ws->bo_slabs_encrypted : ws->bo_slabs; /* Find the correct slab allocator for the given size. */ for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { - struct pb_slabs *slabs = &bo_slabs[i]; + struct pb_slabs *slabs = &ws->bo_slabs[i]; if (size <= 1 << (slabs->min_order + slabs->num_orders - 1)) return slabs; @@ -663,7 +667,7 @@ static void amdgpu_bo_slab_destroy(struct radeon_winsys *rws, struct pb_buffer * assert(!bo->bo); - slabs = get_slabs(ws, bo->base.size, bo->base.usage & RADEON_FLAG_ENCRYPTED); + slabs = get_slabs(ws, bo->base.size); if (bo->base.placement & RADEON_DOMAIN_VRAM) ws->slab_wasted_vram -= get_slab_wasted_size(ws, bo); @@ -699,10 +703,8 @@ static unsigned get_slab_entry_alignment(struct amdgpu_winsys *ws, unsigned size return entry_size; } -static struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, - unsigned entry_size, - unsigned group_index, - bool encrypted) +struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, + unsigned group_index) { struct amdgpu_winsys *ws = priv; struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab); @@ -714,15 +716,9 @@ static struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, if (!slab) return NULL; - if (encrypted) - flags |= RADEON_FLAG_ENCRYPTED; - - struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ? - ws->bo_slabs_encrypted : ws->bo_slabs; - /* Determine the slab buffer size. */ for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { - unsigned max_entry_size = 1 << (slabs[i].min_order + slabs[i].num_orders - 1); + unsigned max_entry_size = 1 << (ws->bo_slabs[i].min_order + ws->bo_slabs[i].num_orders - 1); if (entry_size <= max_entry_size) { /* The slab size is twice the size of the largest possible entry. */ @@ -815,20 +811,6 @@ fail: return NULL; } -struct pb_slab *amdgpu_bo_slab_alloc_encrypted(void *priv, unsigned heap, - unsigned entry_size, - unsigned group_index) -{ - return amdgpu_bo_slab_alloc(priv, heap, entry_size, group_index, true); -} - -struct pb_slab *amdgpu_bo_slab_alloc_normal(void *priv, unsigned heap, - unsigned entry_size, - unsigned group_index) -{ - return amdgpu_bo_slab_alloc(priv, heap, entry_size, group_index, false); -} - void amdgpu_bo_slab_free(struct amdgpu_winsys *ws, struct pb_slab *pslab) { struct amdgpu_slab *slab = amdgpu_slab(pslab); @@ -1163,7 +1145,7 @@ amdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size, if (r) goto error_va_alloc; - r = amdgpu_bo_va_op_raw(ws->dev, NULL, 0, size, bo->va, + r = amdgpu_bo_va_op_raw(ws->dev, NULL, 0, map_size, bo->va, AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP); if (r) goto error_va_map; @@ -1360,34 +1342,23 @@ amdgpu_bo_create(struct amdgpu_winsys *ws, enum radeon_bo_flag flags) { struct amdgpu_winsys_bo *bo; - int heap = -1; - if (domain & (RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA)) - flags |= RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_SUBALLOC; + radeon_canonicalize_bo_flags(&domain, &flags); - /* VRAM implies WC. This is not optional. */ - assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC); - - /* NO_CPU_ACCESS is not valid with GTT. */ - assert(!(domain & RADEON_DOMAIN_GTT) || !(flags & RADEON_FLAG_NO_CPU_ACCESS)); + /* Handle sparse buffers first. */ + if (flags & RADEON_FLAG_SPARSE) { + assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0); - /* Sparse buffers must have NO_CPU_ACCESS set. */ - assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS); + return amdgpu_bo_sparse_create(ws, size, domain, flags); + } - struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ? - ws->bo_slabs_encrypted : ws->bo_slabs; - struct pb_slabs *last_slab = &slabs[NUM_SLAB_ALLOCATORS - 1]; + struct pb_slabs *last_slab = &ws->bo_slabs[NUM_SLAB_ALLOCATORS - 1]; unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1); + int heap = radeon_get_heap_index(domain, flags); /* Sub-allocate small buffers from slabs. */ - if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) && - size <= max_slab_entry_size) { + if (heap >= 0 && size <= max_slab_entry_size) { struct pb_slab_entry *entry; - int heap = radeon_get_heap_index(domain, flags); - - if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS) - goto no_slab; - unsigned alloc_size = size; /* Always use slabs for sizes less than 4 KB because the kernel aligns @@ -1410,7 +1381,7 @@ amdgpu_bo_create(struct amdgpu_winsys *ws, } } - struct pb_slabs *slabs = get_slabs(ws, alloc_size, flags); + struct pb_slabs *slabs = get_slabs(ws, alloc_size); entry = pb_slab_alloc(slabs, alloc_size, heap); if (!entry) { /* Clean up buffer managers and try again. */ @@ -1435,15 +1406,6 @@ amdgpu_bo_create(struct amdgpu_winsys *ws, } no_slab: - if (flags & RADEON_FLAG_SPARSE) { - assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0); - - return amdgpu_bo_sparse_create(ws, size, domain, flags); - } - - /* This flag is irrelevant for the cache. */ - flags &= ~RADEON_FLAG_NO_SUBALLOC; - /* Align size to page size. This is the minimum alignment for normal * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, * like constant/uniform buffers, can benefit from better and more reuse. @@ -1453,11 +1415,13 @@ no_slab: alignment = align(alignment, ws->info.gart_page_size); } - bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING; + bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && + !(flags & RADEON_FLAG_DISCARDABLE); if (use_reusable_pool) { - heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_ENCRYPTED); - assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS); + /* RADEON_FLAG_NO_SUBALLOC is irrelevant for the cache. */ + heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_NO_SUBALLOC); + assert(heap >= 0 && heap < RADEON_NUM_HEAPS); /* Get a buffer from the cache. */ bo = (struct amdgpu_winsys_bo*) @@ -1586,7 +1550,8 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws, /* Initialize the structure. */ simple_mtx_init(&bo->lock, mtx_plain); pipe_reference_init(&bo->base.reference, 1); - bo->base.alignment_log2 = util_logbase2(info.phys_alignment); + bo->base.alignment_log2 = util_logbase2(info.phys_alignment ? + info.phys_alignment : ws->info.gart_page_size); bo->bo = result.buf_handle; bo->base.size = result.alloc_size; bo->base.vtbl = &amdgpu_winsys_bo_vtbl; @@ -1697,7 +1662,8 @@ static bool amdgpu_bo_get_handle(struct radeon_winsys *rws, } static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws, - void *pointer, uint64_t size) + void *pointer, uint64_t size, + enum radeon_bo_flag flags) { struct amdgpu_winsys *ws = amdgpu_winsys(rws); amdgpu_bo_handle buf_handle; diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h index 48bce54ec..2bd15af2a 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h @@ -132,12 +132,8 @@ void amdgpu_bo_unmap(struct radeon_winsys *rws, struct pb_buffer *buf); void amdgpu_bo_init_functions(struct amdgpu_screen_winsys *ws); bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry); -struct pb_slab *amdgpu_bo_slab_alloc_encrypted(void *priv, unsigned heap, - unsigned entry_size, - unsigned group_index); -struct pb_slab *amdgpu_bo_slab_alloc_normal(void *priv, unsigned heap, - unsigned entry_size, - unsigned group_index); +struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, + unsigned group_index); void amdgpu_bo_slab_free(struct amdgpu_winsys *ws, struct pb_slab *slab); static inline diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index fc2340a06..40b8c5850 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -273,11 +273,30 @@ amdgpu_cs_get_next_fence(struct radeon_cmdbuf *rcs) /* CONTEXTS */ -static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws) +static uint32_t +radeon_to_amdgpu_priority(enum radeon_ctx_priority radeon_priority) +{ + switch (radeon_priority) { + case RADEON_CTX_PRIORITY_REALTIME: + return AMDGPU_CTX_PRIORITY_VERY_HIGH; + case RADEON_CTX_PRIORITY_HIGH: + return AMDGPU_CTX_PRIORITY_HIGH; + case RADEON_CTX_PRIORITY_MEDIUM: + return AMDGPU_CTX_PRIORITY_NORMAL; + case RADEON_CTX_PRIORITY_LOW: + return AMDGPU_CTX_PRIORITY_LOW; + default: + unreachable("Invalid context priority"); + } +} + +static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws, + enum radeon_ctx_priority priority) { struct amdgpu_ctx *ctx = CALLOC_STRUCT(amdgpu_ctx); int r; struct amdgpu_bo_alloc_request alloc_buffer = {}; + uint32_t amdgpu_priority = radeon_to_amdgpu_priority(priority); amdgpu_bo_handle buf_handle; if (!ctx) @@ -287,9 +306,9 @@ static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws) ctx->refcount = 1; ctx->initial_num_total_rejected_cs = ctx->ws->num_total_rejected_cs; - r = amdgpu_cs_ctx_create(ctx->ws->dev, &ctx->ctx); + r = amdgpu_cs_ctx_create2(ctx->ws->dev, amdgpu_priority, &ctx->ctx); if (r) { - fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", r); + fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create2 failed. (%i)\n", r); goto error_create; } @@ -389,7 +408,7 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx, bool full_reset_o if (ctx->ws->num_total_rejected_cs > ctx->initial_num_total_rejected_cs) { if (needs_reset) *needs_reset = true; - return ctx->num_rejected_cs ? PIPE_GUILTY_CONTEXT_RESET : + return ctx->rejected_any_cs ? PIPE_GUILTY_CONTEXT_RESET : PIPE_INNOCENT_CONTEXT_RESET; } if (needs_reset) @@ -714,18 +733,20 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, buffer_size = MIN2(buffer_size, max_size); buffer_size = MAX2(buffer_size, min_size); /* min_size is more important */ - enum radeon_bo_domain domain; + /* Use cached GTT for command buffers. Writing to other heaps is very slow on the CPU. + * The speed of writing to GTT WC is somewhere between no difference and very slow, while + * VRAM being very slow a lot more often. + */ + enum radeon_bo_domain domain = RADEON_DOMAIN_GTT; unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING; - if (cs->ring_type == RING_GFX || - cs->ring_type == RING_COMPUTE || - cs->ring_type == RING_DMA) { - domain = ws->info.smart_access_memory ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT; - flags |= RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC; - } else { - /* UVD/VCE */ - /* TODO: validate that UVD/VCE don't read from IBs and enable WC or even VRAM. */ - domain = RADEON_DOMAIN_GTT; + if (cs->ip_type == AMD_IP_GFX || + cs->ip_type == AMD_IP_COMPUTE || + cs->ip_type == AMD_IP_SDMA) { + /* Avoids hangs with "rendercheck -t cacomposite -f a8r8g8b8" via glamor + * on Navi 14 + */ + flags |= RADEON_FLAG_32BIT; } pb = amdgpu_bo_create(ws, buffer_size, @@ -812,7 +833,8 @@ static void amdgpu_set_ib_size(struct radeon_cmdbuf *rcs, struct amdgpu_ib *ib) { if (ib->ptr_ib_size_inside_ib) { *ib->ptr_ib_size = rcs->current.cdw | - S_3F2_CHAIN(1) | S_3F2_VALID(1); + S_3F2_CHAIN(1) | S_3F2_VALID(1) | + S_3F2_PRE_ENA(((struct amdgpu_cs*)ib)->preamble_ib_bo != NULL); } else { *ib->ptr_ib_size = rcs->current.cdw; } @@ -829,40 +851,40 @@ static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct radeon_cmdbuf *r static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws, struct amdgpu_cs_context *cs, - enum ring_type ring_type) + enum amd_ip_type ip_type) { - switch (ring_type) { - case RING_DMA: + switch (ip_type) { + case AMD_IP_SDMA: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_DMA; break; - case RING_UVD: + case AMD_IP_UVD: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD; break; - case RING_UVD_ENC: + case AMD_IP_UVD_ENC: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC; break; - case RING_VCE: + case AMD_IP_VCE: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE; break; - case RING_VCN_DEC: + case AMD_IP_VCN_DEC: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_DEC; break; - case RING_VCN_ENC: + case AMD_IP_VCN_ENC: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_ENC; break; - case RING_VCN_JPEG: + case AMD_IP_VCN_JPEG: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_JPEG; break; - case RING_COMPUTE: - case RING_GFX: - cs->ib[IB_MAIN].ip_type = ring_type == RING_GFX ? AMDGPU_HW_IP_GFX : + case AMD_IP_COMPUTE: + case AMD_IP_GFX: + cs->ib[IB_MAIN].ip_type = ip_type == AMD_IP_GFX ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_COMPUTE; /* The kernel shouldn't invalidate L2 and vL1. The proper place for cache @@ -873,14 +895,19 @@ static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws, * the next IB starts drawing, and so the cache flush at the end of IB * is always late. */ - if (ws->info.drm_minor >= 26) + if (ws->info.drm_minor >= 26) { + cs->ib[IB_PREAMBLE].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE; cs->ib[IB_MAIN].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE; + } break; default: assert(0); } + cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAG_PREAMBLE; + cs->ib[IB_PREAMBLE].ip_type = cs->ib[IB_MAIN].ip_type; + cs->last_added_bo = NULL; return true; } @@ -931,11 +958,11 @@ static void amdgpu_destroy_cs_context(struct amdgpu_winsys *ws, struct amdgpu_cs static bool amdgpu_cs_create(struct radeon_cmdbuf *rcs, struct radeon_winsys_ctx *rwctx, - enum ring_type ring_type, + enum amd_ip_type ip_type, void (*flush)(void *ctx, unsigned flags, struct pipe_fence_handle **fence), void *flush_ctx, - bool stop_exec_on_failure) + bool allow_context_lost) { struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx; struct amdgpu_cs *cs; @@ -951,25 +978,25 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs, cs->ctx = ctx; cs->flush_cs = flush; cs->flush_data = flush_ctx; - cs->ring_type = ring_type; - cs->stop_exec_on_failure = stop_exec_on_failure; + cs->ip_type = ip_type; + cs->allow_context_lost = allow_context_lost; cs->noop = ctx->ws->noop_cs; - cs->has_chaining = ctx->ws->info.chip_class >= GFX7 && - (ring_type == RING_GFX || ring_type == RING_COMPUTE); + cs->has_chaining = ctx->ws->info.gfx_level >= GFX7 && + (ip_type == AMD_IP_GFX || ip_type == AMD_IP_COMPUTE); struct amdgpu_cs_fence_info fence_info; fence_info.handle = cs->ctx->user_fence_bo; - fence_info.offset = cs->ring_type * 4; + fence_info.offset = cs->ip_type * 4; amdgpu_cs_chunk_fence_info_to_data(&fence_info, (void*)&cs->fence_chunk); cs->main.ib_type = IB_MAIN; - if (!amdgpu_init_cs_context(ctx->ws, &cs->csc1, ring_type)) { + if (!amdgpu_init_cs_context(ctx->ws, &cs->csc1, ip_type)) { FREE(cs); return false; } - if (!amdgpu_init_cs_context(ctx->ws, &cs->csc2, ring_type)) { + if (!amdgpu_init_cs_context(ctx->ws, &cs->csc2, ip_type)) { amdgpu_destroy_cs_context(ctx->ws, &cs->csc1); FREE(cs); return false; @@ -1003,6 +1030,13 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs, return true; } +static void amdgpu_cs_set_preamble(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib, + unsigned preamble_num_dw, bool preamble_changed) +{ + /* TODO: implement this properly */ + radeon_emit_array(cs, preamble_ib, preamble_num_dw); +} + static bool amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib, unsigned preamble_num_dw) @@ -1034,14 +1068,12 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i memcpy(map, preamble_ib, preamble_num_dw * 4); /* Pad the IB. */ - uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ring_type]; + uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ip_type]; while (preamble_num_dw & ib_pad_dw_mask) map[preamble_num_dw++] = PKT3_NOP_PAD; amdgpu_bo_unmap(&ws->dummy_ws.base, preamble_bo); for (unsigned i = 0; i < 2; i++) { - csc[i]->ib[IB_PREAMBLE] = csc[i]->ib[IB_MAIN]; - csc[i]->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAG_PREAMBLE; csc[i]->ib[IB_PREAMBLE].va_start = amdgpu_winsys_bo(preamble_bo)->va; csc[i]->ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4; @@ -1112,7 +1144,7 @@ static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw) rcs->current.max_dw += cs_epilog_dw; /* Pad with NOPs but leave 4 dwords for INDIRECT_BUFFER. */ - uint32_t ib_pad_dw_mask = cs->ws->info.ib_pad_dw_mask[cs->ring_type]; + uint32_t ib_pad_dw_mask = cs->ws->info.ib_pad_dw_mask[cs->ip_type]; while ((rcs->current.cdw & ib_pad_dw_mask) != ib_pad_dw_mask - 3) radeon_emit(rcs, PKT3_NOP_PAD); @@ -1194,8 +1226,8 @@ static bool is_noop_fence_dependency(struct amdgpu_cs *acs, * We always want no dependency between back-to-back gfx IBs, because * we need the parallelism between IBs for good performance. */ - if ((acs->ring_type == RING_GFX || - acs->ws->info.num_rings[acs->ring_type] == 1) && + if ((acs->ip_type == AMD_IP_GFX || + acs->ws->info.ip[acs->ip_type].num_queues == 1) && !amdgpu_fence_is_syncobj(fence) && fence->ctx == acs->ctx && fence->fence.ip_type == cs->ib[IB_MAIN].ip_type) @@ -1455,163 +1487,164 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index) } } - if (acs->ring_type == RING_GFX) + if (acs->ip_type == AMD_IP_GFX) ws->gfx_bo_list_counter += cs->num_real_buffers; - bool noop = false; + struct drm_amdgpu_cs_chunk chunks[7]; + unsigned num_chunks = 0; - if (acs->stop_exec_on_failure && acs->ctx->num_rejected_cs) { - r = -ECANCELED; - } else { - struct drm_amdgpu_cs_chunk chunks[7]; - unsigned num_chunks = 0; - - /* BO list */ - if (!use_bo_list_create) { - chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES; - chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_bo_list_in) / 4; - chunks[num_chunks].chunk_data = (uintptr_t)&bo_list_in; - num_chunks++; - } - - /* Fence dependencies. */ - unsigned num_dependencies = cs->fence_dependencies.num; - if (num_dependencies) { - struct drm_amdgpu_cs_chunk_dep *dep_chunk = - alloca(num_dependencies * sizeof(*dep_chunk)); + /* BO list */ + if (!use_bo_list_create) { + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES; + chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_bo_list_in) / 4; + chunks[num_chunks].chunk_data = (uintptr_t)&bo_list_in; + num_chunks++; + } - for (unsigned i = 0; i < num_dependencies; i++) { - struct amdgpu_fence *fence = - (struct amdgpu_fence*)cs->fence_dependencies.list[i]; + /* Fence dependencies. */ + unsigned num_dependencies = cs->fence_dependencies.num; + if (num_dependencies) { + struct drm_amdgpu_cs_chunk_dep *dep_chunk = + alloca(num_dependencies * sizeof(*dep_chunk)); - assert(util_queue_fence_is_signalled(&fence->submitted)); - amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]); - } + for (unsigned i = 0; i < num_dependencies; i++) { + struct amdgpu_fence *fence = + (struct amdgpu_fence*)cs->fence_dependencies.list[i]; - chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES; - chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_dependencies; - chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk; - num_chunks++; + assert(util_queue_fence_is_signalled(&fence->submitted)); + amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]); } - /* Syncobj dependencies. */ - unsigned num_syncobj_dependencies = cs->syncobj_dependencies.num; - if (num_syncobj_dependencies) { - struct drm_amdgpu_cs_chunk_sem *sem_chunk = - alloca(num_syncobj_dependencies * sizeof(sem_chunk[0])); + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES; + chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_dependencies; + chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk; + num_chunks++; + } - for (unsigned i = 0; i < num_syncobj_dependencies; i++) { - struct amdgpu_fence *fence = - (struct amdgpu_fence*)cs->syncobj_dependencies.list[i]; + /* Syncobj dependencies. */ + unsigned num_syncobj_dependencies = cs->syncobj_dependencies.num; + if (num_syncobj_dependencies) { + struct drm_amdgpu_cs_chunk_sem *sem_chunk = + alloca(num_syncobj_dependencies * sizeof(sem_chunk[0])); - if (!amdgpu_fence_is_syncobj(fence)) - continue; + for (unsigned i = 0; i < num_syncobj_dependencies; i++) { + struct amdgpu_fence *fence = + (struct amdgpu_fence*)cs->syncobj_dependencies.list[i]; - assert(util_queue_fence_is_signalled(&fence->submitted)); - sem_chunk[i].handle = fence->syncobj; - } + if (!amdgpu_fence_is_syncobj(fence)) + continue; - chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN; - chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 * num_syncobj_dependencies; - chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk; - num_chunks++; + assert(util_queue_fence_is_signalled(&fence->submitted)); + sem_chunk[i].handle = fence->syncobj; } - /* Syncobj signals. */ - unsigned num_syncobj_to_signal = cs->syncobj_to_signal.num; - if (num_syncobj_to_signal) { - struct drm_amdgpu_cs_chunk_sem *sem_chunk = - alloca(num_syncobj_to_signal * sizeof(sem_chunk[0])); + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN; + chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 * num_syncobj_dependencies; + chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk; + num_chunks++; + } - for (unsigned i = 0; i < num_syncobj_to_signal; i++) { - struct amdgpu_fence *fence = - (struct amdgpu_fence*)cs->syncobj_to_signal.list[i]; + /* Syncobj signals. */ + unsigned num_syncobj_to_signal = cs->syncobj_to_signal.num; + if (num_syncobj_to_signal) { + struct drm_amdgpu_cs_chunk_sem *sem_chunk = + alloca(num_syncobj_to_signal * sizeof(sem_chunk[0])); - assert(amdgpu_fence_is_syncobj(fence)); - sem_chunk[i].handle = fence->syncobj; - } + for (unsigned i = 0; i < num_syncobj_to_signal; i++) { + struct amdgpu_fence *fence = + (struct amdgpu_fence*)cs->syncobj_to_signal.list[i]; - chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_OUT; - chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 - * num_syncobj_to_signal; - chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk; - num_chunks++; + assert(amdgpu_fence_is_syncobj(fence)); + sem_chunk[i].handle = fence->syncobj; } - /* Fence */ - if (has_user_fence) { - chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE; - chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4; - chunks[num_chunks].chunk_data = (uintptr_t)&acs->fence_chunk; - num_chunks++; - } + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_OUT; + chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 + * num_syncobj_to_signal; + chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk; + num_chunks++; + } - /* IB */ - if (cs->ib[IB_PREAMBLE].ib_bytes) { - chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB; - chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; - chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PREAMBLE]; - num_chunks++; - } + /* Fence */ + if (has_user_fence) { + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE; + chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4; + chunks[num_chunks].chunk_data = (uintptr_t)&acs->fence_chunk; + num_chunks++; + } - /* IB */ - cs->ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */ + /* IB */ + if (cs->ib[IB_PREAMBLE].ib_bytes) { chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB; chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; - chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN]; + chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PREAMBLE]; num_chunks++; + } - if (cs->secure) { - cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAGS_SECURE; - cs->ib[IB_MAIN].flags |= AMDGPU_IB_FLAGS_SECURE; - } else { - cs->ib[IB_PREAMBLE].flags &= ~AMDGPU_IB_FLAGS_SECURE; - cs->ib[IB_MAIN].flags &= ~AMDGPU_IB_FLAGS_SECURE; - } + /* IB */ + cs->ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */ + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB; + chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; + chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN]; + num_chunks++; - /* Apply RADEON_NOOP. */ - if (acs->noop) { - if (acs->ring_type == RING_GFX) { - /* Reduce the IB size and fill it with NOP to make it like an empty IB. */ - unsigned noop_size = MIN2(cs->ib[IB_MAIN].ib_bytes, ws->info.ib_alignment); + if (cs->secure) { + cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAGS_SECURE; + cs->ib[IB_MAIN].flags |= AMDGPU_IB_FLAGS_SECURE; + } else { + cs->ib[IB_PREAMBLE].flags &= ~AMDGPU_IB_FLAGS_SECURE; + cs->ib[IB_MAIN].flags &= ~AMDGPU_IB_FLAGS_SECURE; + } - cs->ib_main_addr[0] = PKT3(PKT3_NOP, noop_size / 4 - 2, 0); - cs->ib[IB_MAIN].ib_bytes = noop_size; - } else { - noop = true; - } - } + bool noop = acs->noop; - assert(num_chunks <= ARRAY_SIZE(chunks)); + if (noop && acs->ip_type == AMD_IP_GFX) { + /* Reduce the IB size and fill it with NOP to make it like an empty IB. */ + unsigned noop_size = MIN2(cs->ib[IB_MAIN].ib_bytes, ws->info.ib_alignment); - r = noop ? 0 : amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list, - num_chunks, chunks, &seq_no); + cs->ib_main_addr[0] = PKT3(PKT3_NOP, noop_size / 4 - 2, 0); + cs->ib[IB_MAIN].ib_bytes = noop_size; + noop = false; } - if (r) { - if (r == -ENOMEM) - fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); - else if (r == -ECANCELED) - fprintf(stderr, "amdgpu: The CS has been cancelled because the context is lost.\n"); - else - fprintf(stderr, "amdgpu: The CS has been rejected, " - "see dmesg for more information (%i).\n", r); - - acs->ctx->num_rejected_cs++; - ws->num_total_rejected_cs++; - } else if (!noop) { - /* Success. */ - uint64_t *user_fence = NULL; - - /* Need to reserve 4 QWORD for user fence: - * QWORD[0]: completed fence - * QWORD[1]: preempted fence - * QWORD[2]: reset fence - * QWORD[3]: preempted then reset - **/ - if (has_user_fence) - user_fence = acs->ctx->user_fence_cpu_address_base + acs->ring_type * 4; - amdgpu_fence_submitted(cs->fence, seq_no, user_fence); + assert(num_chunks <= ARRAY_SIZE(chunks)); + + if (unlikely(acs->ctx->rejected_any_cs)) { + r = -ECANCELED; + } else if (unlikely(noop)) { + r = 0; + } else { + /* Submit the command buffer. + * + * The kernel returns -ENOMEM with many parallel processes using GDS such as test suites + * quite often, but it eventually succeeds after enough attempts. This happens frequently + * with dEQP using NGG streamout. + */ + r = 0; + + do { + /* Wait 1 ms and try again. */ + if (r == -ENOMEM) + os_time_sleep(1000); + + r = amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list, + num_chunks, chunks, &seq_no); + } while (r == -ENOMEM); + + if (!r) { + /* Success. */ + uint64_t *user_fence = NULL; + + /* Need to reserve 4 QWORD for user fence: + * QWORD[0]: completed fence + * QWORD[1]: preempted fence + * QWORD[2]: reset fence + * QWORD[3]: preempted then reset + */ + if (has_user_fence) + user_fence = acs->ctx->user_fence_cpu_address_base + acs->ip_type * 4; + amdgpu_fence_submitted(cs->fence, seq_no, user_fence); + } } /* Cleanup. */ @@ -1619,6 +1652,23 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index) amdgpu_bo_list_destroy_raw(ws->dev, bo_list); cleanup: + if (unlikely(r)) { + if (!acs->allow_context_lost) { + /* Non-robust contexts are allowed to terminate the process. The only alternative is + * to skip command submission, which would look like a freeze because nothing is drawn, + * which is not a useful state to be in under any circumstances. + */ + fprintf(stderr, "amdgpu: The CS has been rejected (%i), but the context isn't robust.\n", r); + fprintf(stderr, "amdgpu: The process will be terminated.\n"); + exit(1); + } + + fprintf(stderr, "amdgpu: The CS has been rejected (%i). Recreate the context.\n", r); + if (!acs->ctx->rejected_any_cs) + ws->num_total_rejected_cs++; + acs->ctx->rejected_any_cs = true; + } + /* If there was an error, signal the fence, because it won't be signalled * by the hardware. */ if (r || noop) @@ -1653,14 +1703,14 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, struct amdgpu_cs *cs = amdgpu_cs(rcs); struct amdgpu_winsys *ws = cs->ws; int error_code = 0; - uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ring_type]; + uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ip_type]; rcs->current.max_dw += amdgpu_cs_epilog_dws(cs); /* Pad the IB according to the mask. */ - switch (cs->ring_type) { - case RING_DMA: - if (ws->info.chip_class <= GFX6) { + switch (cs->ip_type) { + case AMD_IP_SDMA: + if (ws->info.gfx_level <= GFX6) { while (rcs->current.cdw & ib_pad_dw_mask) radeon_emit(rcs, 0xf0000000); /* NOP packet */ } else { @@ -1668,8 +1718,8 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, radeon_emit(rcs, SDMA_NOP_PAD); } break; - case RING_GFX: - case RING_COMPUTE: + case AMD_IP_GFX: + case AMD_IP_COMPUTE: if (ws->info.gfx_ib_pad_with_type2) { while (rcs->current.cdw & ib_pad_dw_mask) radeon_emit(rcs, PKT2_NOP_PAD); @@ -1677,15 +1727,15 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, while (rcs->current.cdw & ib_pad_dw_mask) radeon_emit(rcs, PKT3_NOP_PAD); } - if (cs->ring_type == RING_GFX) + if (cs->ip_type == AMD_IP_GFX) ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4; break; - case RING_UVD: - case RING_UVD_ENC: + case AMD_IP_UVD: + case AMD_IP_UVD_ENC: while (rcs->current.cdw & ib_pad_dw_mask) radeon_emit(rcs, 0x80000000); /* type2 nop packet */ break; - case RING_VCN_JPEG: + case AMD_IP_VCN_JPEG: if (rcs->current.cdw % 2) assert(0); while (rcs->current.cdw & ib_pad_dw_mask) { @@ -1693,7 +1743,7 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, radeon_emit(rcs, 0x00000000); } break; - case RING_VCN_DEC: + case AMD_IP_VCN_DEC: while (rcs->current.cdw & ib_pad_dw_mask) radeon_emit(rcs, 0x81ff); /* nop packet */ break; @@ -1768,9 +1818,9 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, rcs->used_gart_kb = 0; rcs->used_vram_kb = 0; - if (cs->ring_type == RING_GFX) + if (cs->ip_type == AMD_IP_GFX) ws->num_gfx_IBs++; - else if (cs->ring_type == RING_DMA) + else if (cs->ip_type == AMD_IP_SDMA) ws->num_sdma_IBs++; return error_code; @@ -1811,6 +1861,7 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws) ws->base.ctx_destroy = amdgpu_ctx_destroy; ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status; ws->base.cs_create = amdgpu_cs_create; + ws->base.cs_set_preamble = amdgpu_cs_set_preamble; ws->base.cs_setup_preemption = amdgpu_cs_setup_preemption; ws->base.cs_destroy = amdgpu_cs_destroy; ws->base.cs_add_buffer = amdgpu_cs_add_buffer; diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 794d13bd0..13b8bf73d 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -45,7 +45,7 @@ struct amdgpu_ctx { uint64_t *user_fence_cpu_address_base; int refcount; unsigned initial_num_total_rejected_cs; - unsigned num_rejected_cs; + bool rejected_any_cs; }; struct amdgpu_cs_buffer { @@ -131,7 +131,7 @@ struct amdgpu_cs { struct amdgpu_ib main; /* must be first because this is inherited */ struct amdgpu_winsys *ws; struct amdgpu_ctx *ctx; - enum ring_type ring_type; + enum amd_ip_type ip_type; struct drm_amdgpu_cs_chunk_fence fence_chunk; /* We flip between these two CS. While one is being consumed @@ -154,7 +154,7 @@ struct amdgpu_cs { /* Flush CS. */ void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence); void *flush_data; - bool stop_exec_on_failure; + bool allow_context_lost; bool noop; bool has_chaining; diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_public.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_public.h deleted file mode 100644 index f403ed997..000000000 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_public.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright © 2015 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -#ifndef AMDGPU_PUBLIC_H -#define AMDGPU_PUBLIC_H - -#include "pipe/p_defines.h" -#include "gallium/winsys/radeon/drm/radeon_drm_public.h" - -struct radeon_winsys; -struct pipe_screen; -struct pipe_screen_config; - -struct radeon_winsys * -amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, - radeon_screen_create_t screen_create); - -#endif diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index cc71b0e92..05ff784d3 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -28,7 +28,6 @@ */ #include "amdgpu_cs.h" -#include "amdgpu_public.h" #include "util/os_file.h" #include "util/os_misc.h" @@ -45,7 +44,7 @@ #include "sid.h" static struct hash_table *dev_tab = NULL; -static simple_mtx_t dev_tab_mutex = _SIMPLE_MTX_INITIALIZER_NP; +static simple_mtx_t dev_tab_mutex = SIMPLE_MTX_INITIALIZER; #if DEBUG DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false) @@ -61,23 +60,25 @@ static void handle_env_var_force_family(struct amdgpu_winsys *ws) for (i = CHIP_TAHITI; i < CHIP_LAST; i++) { if (!strcmp(family, ac_get_llvm_processor_name(i))) { - /* Override family and chip_class. */ + /* Override family and gfx_level. */ ws->info.family = i; ws->info.name = "NOOP"; strcpy(ws->info.lowercase_name , "noop"); - if (i >= CHIP_SIENNA_CICHLID) - ws->info.chip_class = GFX10_3; + if (i >= CHIP_GFX1100) + ws->info.gfx_level = GFX11; + else if (i >= CHIP_NAVI21) + ws->info.gfx_level = GFX10_3; else if (i >= CHIP_NAVI10) - ws->info.chip_class = GFX10; + ws->info.gfx_level = GFX10; else if (i >= CHIP_VEGA10) - ws->info.chip_class = GFX9; + ws->info.gfx_level = GFX9; else if (i >= CHIP_TONGA) - ws->info.chip_class = GFX8; + ws->info.gfx_level = GFX8; else if (i >= CHIP_BONAIRE) - ws->info.chip_class = GFX7; + ws->info.gfx_level = GFX7; else - ws->info.chip_class = GFX6; + ws->info.gfx_level = GFX6; /* Don't submit any IBs. */ setenv("RADEON_NOOP", "1", 1); @@ -94,7 +95,7 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, const struct pipe_screen_config *config, int fd) { - if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo)) + if (!ac_query_gpu_info(fd, ws->dev, &ws->info)) goto fail; /* TODO: Enable this once the kernel handles it efficiently. */ @@ -362,6 +363,34 @@ static bool amdgpu_cs_is_secure(struct radeon_cmdbuf *rcs) return cs->csc->secure; } +static uint32_t +radeon_to_amdgpu_pstate(enum radeon_ctx_pstate pstate) +{ + switch (pstate) { + case RADEON_CTX_PSTATE_NONE: + return AMDGPU_CTX_STABLE_PSTATE_NONE; + case RADEON_CTX_PSTATE_STANDARD: + return AMDGPU_CTX_STABLE_PSTATE_STANDARD; + case RADEON_CTX_PSTATE_MIN_SCLK: + return AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK; + case RADEON_CTX_PSTATE_MIN_MCLK: + return AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK; + case RADEON_CTX_PSTATE_PEAK: + return AMDGPU_CTX_STABLE_PSTATE_PEAK; + default: + unreachable("Invalid pstate"); + } +} + +static bool +amdgpu_cs_set_pstate(struct radeon_cmdbuf *rcs, enum radeon_ctx_pstate pstate) +{ + struct amdgpu_cs *cs = amdgpu_cs(rcs); + uint32_t amdgpu_pstate = radeon_to_amdgpu_pstate(pstate); + return amdgpu_cs_ctx_stable_pstate(cs->ctx->ctx, + AMDGPU_CTX_OP_SET_STABLE_PSTATE, amdgpu_pstate, NULL) == 0; +} + PUBLIC struct radeon_winsys * amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, radeon_screen_create_t screen_create) @@ -450,9 +479,9 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, goto fail_alloc; /* Create managers. */ - pb_cache_init(&aws->bo_cache, RADEON_MAX_CACHED_HEAPS, + pb_cache_init(&aws->bo_cache, RADEON_NUM_HEAPS, 500000, aws->check_vm ? 1.0f : 2.0f, 0, - (aws->info.vram_size + aws->info.gart_size) / 8, aws, + ((uint64_t)aws->info.vram_size_kb + aws->info.gart_size_kb) * 1024 / 8, aws, /* Cast to void* because one of the function parameters * is a struct pointer instead of void*. */ (void*)amdgpu_bo_destroy, (void*)amdgpu_bo_can_reclaim); @@ -470,25 +499,10 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, if (!pb_slabs_init(&aws->bo_slabs[i], min_order, max_order, - RADEON_MAX_SLAB_HEAPS, true, - aws, - amdgpu_bo_can_reclaim_slab, - amdgpu_bo_slab_alloc_normal, - /* Cast to void* because one of the function parameters - * is a struct pointer instead of void*. */ - (void*)amdgpu_bo_slab_free)) { - amdgpu_winsys_destroy(&ws->base); - simple_mtx_unlock(&dev_tab_mutex); - return NULL; - } - - if (aws->info.has_tmz_support && - !pb_slabs_init(&aws->bo_slabs_encrypted[i], - min_order, max_order, - RADEON_MAX_SLAB_HEAPS, true, + RADEON_NUM_HEAPS, true, aws, amdgpu_bo_can_reclaim_slab, - amdgpu_bo_slab_alloc_encrypted, + amdgpu_bo_slab_alloc, /* Cast to void* because one of the function parameters * is a struct pointer instead of void*. */ (void*)amdgpu_bo_slab_free)) { @@ -546,6 +560,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, ws->base.read_registers = amdgpu_read_registers; ws->base.pin_threads_to_L3_cache = amdgpu_pin_threads_to_L3_cache; ws->base.cs_is_secure = amdgpu_cs_is_secure; + ws->base.cs_set_pstate = amdgpu_cs_set_pstate; amdgpu_bo_init_functions(ws); amdgpu_cs_init_functions(ws); diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h index 26e81f94d..1bff953a1 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h @@ -30,7 +30,7 @@ #include "pipebuffer/pb_cache.h" #include "pipebuffer/pb_slab.h" -#include "gallium/drivers/radeon/radeon_winsys.h" +#include "winsys/radeon_winsys.h" #include "util/simple_mtx.h" #include "util/u_queue.h" #include <amdgpu.h> @@ -64,7 +64,6 @@ struct amdgpu_winsys { * need to layer the allocators, so that we don't waste too much memory. */ struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS]; - struct pb_slabs bo_slabs_encrypted[NUM_SLAB_ALLOCATORS]; amdgpu_device_handle dev; @@ -93,7 +92,6 @@ struct amdgpu_winsys { /* multithreaded IB submission */ struct util_queue cs_queue; - struct amdgpu_gpu_info amdinfo; struct ac_addrlib *addrlib; bool check_vm; |