summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/winsys/amdgpu/drm
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2022-09-02 05:47:02 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2022-09-02 05:47:02 +0000
commit0dbbf1e0708df85a357d70e2708c0a11aeb5480e (patch)
tree6656ff8eb8b15a2fc1c02888973caf618388cfd0 /lib/mesa/src/gallium/winsys/amdgpu/drm
parent5f66494d31f735486b8222ecfa0a0c9046e92543 (diff)
Merge Mesa 22.1.7
Diffstat (limited to 'lib/mesa/src/gallium/winsys/amdgpu/drm')
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c12
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c214
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h19
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_public.h4
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c2
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c21
6 files changed, 131 insertions, 141 deletions
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index d2c1f3514..bcba77827 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -49,7 +49,7 @@ struct amdgpu_sparse_backing_chunk {
static bool amdgpu_bo_wait(struct radeon_winsys *rws,
struct pb_buffer *_buf, uint64_t timeout,
- enum radeon_bo_usage usage)
+ unsigned usage)
{
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
@@ -1494,7 +1494,8 @@ amdgpu_buffer_create(struct radeon_winsys *ws,
static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
struct winsys_handle *whandle,
- unsigned vm_alignment)
+ unsigned vm_alignment,
+ bool is_prime_linear_buffer)
{
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
struct amdgpu_winsys_bo *bo = NULL;
@@ -1557,7 +1558,11 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
if (!bo)
goto error;
- r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
+ r = amdgpu_bo_va_op_raw(ws->dev, result.buf_handle, 0, result.alloc_size, va,
+ AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
+ AMDGPU_VM_PAGE_EXECUTABLE |
+ (is_prime_linear_buffer ? AMDGPU_VM_MTYPE_UC : 0),
+ AMDGPU_VA_OP_MAP);
if (r)
goto error;
@@ -1575,6 +1580,7 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
* if it can be used for scanout.
*/
flags |= RADEON_FLAG_ENCRYPTED;
+ *((bool*)&rws->uses_secure_bos) = true;
}
/* Initialize the structure. */
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 6452d2ba4..fc2340a06 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -36,8 +36,7 @@
/* FENCES */
static struct pipe_fence_handle *
-amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
- unsigned ip_instance, unsigned ring)
+amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type)
{
struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
@@ -46,8 +45,6 @@ amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
fence->ctx = ctx;
fence->fence.context = ctx->ctx;
fence->fence.ip_type = ip_type;
- fence->fence.ip_instance = ip_instance;
- fence->fence.ring = ring;
util_queue_fence_init(&fence->submitted);
util_queue_fence_reset(&fence->submitted);
p_atomic_inc(&ctx->refcount);
@@ -266,9 +263,7 @@ amdgpu_cs_get_next_fence(struct radeon_cmdbuf *rcs)
}
fence = amdgpu_fence_create(cs->ctx,
- cs->csc->ib[IB_MAIN].ip_type,
- cs->csc->ib[IB_MAIN].ip_instance,
- cs->csc->ib[IB_MAIN].ring);
+ cs->csc->ib[IB_MAIN].ip_type);
if (!fence)
return NULL;
@@ -471,7 +466,7 @@ int amdgpu_lookup_buffer_any_type(struct amdgpu_cs_context *cs, struct amdgpu_wi
}
static int
-amdgpu_do_add_real_buffer(struct amdgpu_winsys *ws, struct amdgpu_cs_context *cs,
+amdgpu_do_add_real_buffer(struct amdgpu_cs_context *cs,
struct amdgpu_winsys_bo *bo)
{
struct amdgpu_cs_buffer *buffer;
@@ -503,24 +498,23 @@ amdgpu_do_add_real_buffer(struct amdgpu_winsys *ws, struct amdgpu_cs_context *cs
buffer = &cs->real_buffers[idx];
memset(buffer, 0, sizeof(*buffer));
- amdgpu_winsys_bo_reference(ws, &buffer->bo, bo);
+ amdgpu_winsys_bo_reference(cs->ws, &buffer->bo, bo);
cs->num_real_buffers++;
return idx;
}
static int
-amdgpu_lookup_or_add_real_buffer(struct radeon_cmdbuf *rcs, struct amdgpu_cs *acs,
+amdgpu_lookup_or_add_real_buffer(struct radeon_cmdbuf *rcs, struct amdgpu_cs_context *cs,
struct amdgpu_winsys_bo *bo)
{
- struct amdgpu_cs_context *cs = acs->csc;
unsigned hash;
int idx = amdgpu_lookup_buffer(cs, bo, cs->real_buffers, cs->num_real_buffers);
if (idx >= 0)
return idx;
- idx = amdgpu_do_add_real_buffer(acs->ws, cs, bo);
+ idx = amdgpu_do_add_real_buffer(cs, bo);
hash = bo->unique_id & (BUFFER_HASHLIST_SIZE-1);
cs->buffer_indices_hashlist[hash] = idx & 0x7fff;
@@ -533,12 +527,10 @@ amdgpu_lookup_or_add_real_buffer(struct radeon_cmdbuf *rcs, struct amdgpu_cs *ac
return idx;
}
-static int amdgpu_lookup_or_add_slab_buffer(struct amdgpu_winsys *ws,
- struct radeon_cmdbuf *rcs,
- struct amdgpu_cs *acs,
+static int amdgpu_lookup_or_add_slab_buffer(struct radeon_cmdbuf *rcs,
+ struct amdgpu_cs_context *cs,
struct amdgpu_winsys_bo *bo)
{
- struct amdgpu_cs_context *cs = acs->csc;
struct amdgpu_cs_buffer *buffer;
unsigned hash;
int idx = amdgpu_lookup_buffer(cs, bo, cs->slab_buffers, cs->num_slab_buffers);
@@ -547,7 +539,7 @@ static int amdgpu_lookup_or_add_slab_buffer(struct amdgpu_winsys *ws,
if (idx >= 0)
return idx;
- real_idx = amdgpu_lookup_or_add_real_buffer(rcs, acs, bo->u.slab.real);
+ real_idx = amdgpu_lookup_or_add_real_buffer(rcs, cs, bo->u.slab.real);
if (real_idx < 0)
return -1;
@@ -573,8 +565,8 @@ static int amdgpu_lookup_or_add_slab_buffer(struct amdgpu_winsys *ws,
buffer = &cs->slab_buffers[idx];
memset(buffer, 0, sizeof(*buffer));
- amdgpu_winsys_bo_reference(ws, &buffer->bo, bo);
- buffer->u.slab.real_idx = real_idx;
+ amdgpu_winsys_bo_reference(cs->ws, &buffer->bo, bo);
+ buffer->slab_real_idx = real_idx;
cs->num_slab_buffers++;
hash = bo->unique_id & (BUFFER_HASHLIST_SIZE-1);
@@ -583,12 +575,10 @@ static int amdgpu_lookup_or_add_slab_buffer(struct amdgpu_winsys *ws,
return idx;
}
-static int amdgpu_lookup_or_add_sparse_buffer(struct amdgpu_winsys *ws,
- struct radeon_cmdbuf *rcs,
- struct amdgpu_cs *acs,
+static int amdgpu_lookup_or_add_sparse_buffer(struct radeon_cmdbuf *rcs,
+ struct amdgpu_cs_context *cs,
struct amdgpu_winsys_bo *bo)
{
- struct amdgpu_cs_context *cs = acs->csc;
struct amdgpu_cs_buffer *buffer;
unsigned hash;
int idx = amdgpu_lookup_buffer(cs, bo, cs->sparse_buffers, cs->num_sparse_buffers);
@@ -618,7 +608,7 @@ static int amdgpu_lookup_or_add_sparse_buffer(struct amdgpu_winsys *ws,
buffer = &cs->sparse_buffers[idx];
memset(buffer, 0, sizeof(*buffer));
- amdgpu_winsys_bo_reference(ws, &buffer->bo, bo);
+ amdgpu_winsys_bo_reference(cs->ws, &buffer->bo, bo);
cs->num_sparse_buffers++;
hash = bo->unique_id & (BUFFER_HASHLIST_SIZE-1);
@@ -643,15 +633,13 @@ static int amdgpu_lookup_or_add_sparse_buffer(struct amdgpu_winsys *ws,
static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,
struct pb_buffer *buf,
- enum radeon_bo_usage usage,
- enum radeon_bo_domain domains,
- enum radeon_bo_priority priority)
+ unsigned usage,
+ enum radeon_bo_domain domains)
{
/* Don't use the "domains" parameter. Amdgpu doesn't support changing
* the buffer placement during command submission.
*/
- struct amdgpu_cs *acs = amdgpu_cs(rcs);
- struct amdgpu_cs_context *cs = acs->csc;
+ struct amdgpu_cs_context *cs = (struct amdgpu_cs_context*)rcs->csc;
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
struct amdgpu_cs_buffer *buffer;
int index;
@@ -661,43 +649,43 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,
* are outside of the winsys.
*/
if (bo == cs->last_added_bo &&
- (usage & cs->last_added_bo_usage) == usage &&
- (1u << priority) & cs->last_added_bo_priority_usage)
+ (usage & cs->last_added_bo_usage) == usage)
return cs->last_added_bo_index;
if (!(bo->base.usage & RADEON_FLAG_SPARSE)) {
if (!bo->bo) {
- index = amdgpu_lookup_or_add_slab_buffer(acs->ws, rcs, acs, bo);
+ index = amdgpu_lookup_or_add_slab_buffer(rcs, cs, bo);
if (index < 0)
return 0;
buffer = &cs->slab_buffers[index];
buffer->usage |= usage;
+ cs->last_added_bo_usage = buffer->usage;
- usage &= ~RADEON_USAGE_SYNCHRONIZED;
- index = buffer->u.slab.real_idx;
+ index = buffer->slab_real_idx;
+ buffer = &cs->real_buffers[index];
+ buffer->usage |= usage & ~RADEON_USAGE_SYNCHRONIZED;
} else {
- index = amdgpu_lookup_or_add_real_buffer(rcs, acs, bo);
+ index = amdgpu_lookup_or_add_real_buffer(rcs, cs, bo);
if (index < 0)
return 0;
- }
- buffer = &cs->real_buffers[index];
+ buffer = &cs->real_buffers[index];
+ buffer->usage |= usage;
+ cs->last_added_bo_usage = buffer->usage;
+ }
} else {
- index = amdgpu_lookup_or_add_sparse_buffer(acs->ws, rcs, acs, bo);
+ index = amdgpu_lookup_or_add_sparse_buffer(rcs, cs, bo);
if (index < 0)
return 0;
buffer = &cs->sparse_buffers[index];
+ buffer->usage |= usage;
+ cs->last_added_bo_usage = buffer->usage;
}
- buffer->u.real.priority_usage |= 1u << priority;
- buffer->usage |= usage;
-
cs->last_added_bo = bo;
cs->last_added_bo_index = index;
- cs->last_added_bo_usage = buffer->usage;
- cs->last_added_bo_priority_usage = buffer->u.real.priority_usage;
return index;
}
@@ -807,7 +795,7 @@ static bool amdgpu_get_new_ib(struct amdgpu_winsys *ws,
ib->ptr_ib_size_inside_ib = false;
amdgpu_cs_add_buffer(cs->main.rcs, ib->big_ib_buffer,
- RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);
+ RADEON_USAGE_READ | RADEON_PRIO_IB, 0);
rcs->current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space);
@@ -990,13 +978,16 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs,
memset(cs->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist));
/* Set the first submission context as current. */
- cs->csc = &cs->csc1;
+ rcs->csc = cs->csc = &cs->csc1;
cs->cst = &cs->csc2;
/* Assign to both amdgpu_cs_context; only csc will use it. */
cs->csc1.buffer_indices_hashlist = cs->buffer_indices_hashlist;
cs->csc2.buffer_indices_hashlist = cs->buffer_indices_hashlist;
+ cs->csc1.ws = ctx->ws;
+ cs->csc2.ws = ctx->ws;
+
cs->main.rcs = rcs;
rcs->priv = cs;
@@ -1060,8 +1051,8 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i
assert(!cs->preamble_ib_bo);
cs->preamble_ib_bo = preamble_bo;
- amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ, 0,
- RADEON_PRIO_IB1);
+ amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo,
+ RADEON_USAGE_READ | RADEON_PRIO_IB, 0);
return true;
}
@@ -1070,38 +1061,31 @@ static bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs)
return true;
}
-static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw,
- bool force_chaining)
+static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw)
{
struct amdgpu_cs *cs = amdgpu_cs(rcs);
struct amdgpu_ib *ib = &cs->main;
- unsigned cs_epilog_dw = amdgpu_cs_epilog_dws(cs);
- unsigned need_byte_size = (dw + cs_epilog_dw) * 4;
assert(rcs->current.cdw <= rcs->current.max_dw);
/* 125% of the size for IB epilog. */
- unsigned safe_byte_size = need_byte_size + need_byte_size / 4;
- ib->max_check_space_size = MAX2(ib->max_check_space_size,
- safe_byte_size);
-
- /* If force_chaining is true, we can't return. We have to chain. */
- if (!force_chaining) {
- unsigned requested_size = rcs->prev_dw + rcs->current.cdw + dw;
+ unsigned requested_size = rcs->prev_dw + rcs->current.cdw + dw;
- if (requested_size > IB_MAX_SUBMIT_DWORDS)
- return false;
+ if (requested_size > IB_MAX_SUBMIT_DWORDS)
+ return false;
- ib->max_ib_size = MAX2(ib->max_ib_size, requested_size);
+ if (rcs->current.max_dw - rcs->current.cdw >= dw)
+ return true;
- if (rcs->current.max_dw - rcs->current.cdw >= dw)
- return true;
- }
+ unsigned cs_epilog_dw = amdgpu_cs_epilog_dws(cs);
+ unsigned need_byte_size = (dw + cs_epilog_dw) * 4;
+ unsigned safe_byte_size = need_byte_size + need_byte_size / 4;
+ ib->max_check_space_size = MAX2(ib->max_check_space_size,
+ safe_byte_size);
+ ib->max_ib_size = MAX2(ib->max_ib_size, requested_size);
- if (!cs->has_chaining) {
- assert(!force_chaining);
+ if (!cs->has_chaining)
return false;
- }
/* Allocate a new chunk */
if (rcs->num_prev >= rcs->max_prev) {
@@ -1159,7 +1143,7 @@ static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw,
rcs->gpu_address = va;
amdgpu_cs_add_buffer(cs->main.rcs, ib->big_ib_buffer,
- RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);
+ RADEON_USAGE_READ | RADEON_PRIO_IB, 0);
return true;
}
@@ -1174,7 +1158,7 @@ static unsigned amdgpu_cs_get_buffer_list(struct radeon_cmdbuf *rcs,
for (i = 0; i < cs->num_real_buffers; i++) {
list[i].bo_size = cs->real_buffers[i].bo->base.size;
list[i].vm_address = cs->real_buffers[i].bo->va;
- list[i].priority_usage = cs->real_buffers[i].u.real.priority_usage;
+ list[i].priority_usage = cs->real_buffers[i].usage;
}
}
return cs->num_real_buffers;
@@ -1214,9 +1198,7 @@ static bool is_noop_fence_dependency(struct amdgpu_cs *acs,
acs->ws->info.num_rings[acs->ring_type] == 1) &&
!amdgpu_fence_is_syncobj(fence) &&
fence->ctx == acs->ctx &&
- fence->fence.ip_type == cs->ib[IB_MAIN].ip_type &&
- fence->fence.ip_instance == cs->ib[IB_MAIN].ip_instance &&
- fence->fence.ring == cs->ib[IB_MAIN].ring)
+ fence->fence.ip_type == cs->ib[IB_MAIN].ip_type)
return true;
return amdgpu_fence_wait((void *)fence, 0, false);
@@ -1232,9 +1214,6 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rws,
util_queue_fence_wait(&fence->submitted);
- /* Start fences are not needed here. */
- assert(!(dependency_flags & RADEON_DEPENDENCY_START_FENCE));
-
if (is_noop_fence_dependency(acs, fence))
return;
@@ -1245,13 +1224,14 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rws,
}
static void amdgpu_add_bo_fence_dependencies(struct amdgpu_cs *acs,
+ struct amdgpu_cs_context *cs,
struct amdgpu_cs_buffer *buffer)
{
- struct amdgpu_cs_context *cs = acs->csc;
struct amdgpu_winsys_bo *bo = buffer->bo;
unsigned new_num_fences = 0;
+ const unsigned num_fences = bo->num_fences;
- for (unsigned j = 0; j < bo->num_fences; ++j) {
+ for (unsigned j = 0; j < num_fences; ++j) {
struct amdgpu_fence *bo_fence = (void *)bo->fences[j];
if (is_noop_fence_dependency(acs, bo_fence))
@@ -1266,7 +1246,7 @@ static void amdgpu_add_bo_fence_dependencies(struct amdgpu_cs *acs,
add_fence_to_list(&cs->fence_dependencies, bo_fence);
}
- for (unsigned j = new_num_fences; j < bo->num_fences; ++j)
+ for (unsigned j = new_num_fences; j < num_fences; ++j)
amdgpu_fence_reference(&bo->fences[j], NULL);
bo->num_fences = new_num_fences;
@@ -1308,14 +1288,25 @@ void amdgpu_add_fences(struct amdgpu_winsys_bo *bo,
}
}
+ unsigned bo_num_fences = bo->num_fences;
+
for (unsigned i = 0; i < num_fences; ++i) {
- bo->fences[bo->num_fences] = NULL;
- amdgpu_fence_reference(&bo->fences[bo->num_fences], fences[i]);
- bo->num_fences++;
+ bo->fences[bo_num_fences] = NULL;
+ amdgpu_fence_reference(&bo->fences[bo_num_fences], fences[i]);
+ bo_num_fences++;
}
+ bo->num_fences = bo_num_fences;
+}
+
+static void amdgpu_inc_bo_num_active_ioctls(unsigned num_buffers,
+ struct amdgpu_cs_buffer *buffers)
+{
+ for (unsigned i = 0; i < num_buffers; i++)
+ p_atomic_inc(&buffers[i].bo->num_active_ioctls);
}
static void amdgpu_add_fence_dependencies_bo_list(struct amdgpu_cs *acs,
+ struct amdgpu_cs_context *cs,
struct pipe_fence_handle *fence,
unsigned num_buffers,
struct amdgpu_cs_buffer *buffers)
@@ -1324,8 +1315,7 @@ static void amdgpu_add_fence_dependencies_bo_list(struct amdgpu_cs *acs,
struct amdgpu_cs_buffer *buffer = &buffers[i];
struct amdgpu_winsys_bo *bo = buffer->bo;
- amdgpu_add_bo_fence_dependencies(acs, buffer);
- p_atomic_inc(&bo->num_active_ioctls);
+ amdgpu_add_bo_fence_dependencies(acs, cs, buffer);
amdgpu_add_fences(bo, 1, &fence);
}
}
@@ -1333,13 +1323,12 @@ static void amdgpu_add_fence_dependencies_bo_list(struct amdgpu_cs *acs,
/* Since the kernel driver doesn't synchronize execution between different
* rings automatically, we have to add fence dependencies manually.
*/
-static void amdgpu_add_fence_dependencies_bo_lists(struct amdgpu_cs *acs)
+static void amdgpu_add_fence_dependencies_bo_lists(struct amdgpu_cs *acs,
+ struct amdgpu_cs_context *cs)
{
- struct amdgpu_cs_context *cs = acs->csc;
-
- amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_real_buffers, cs->real_buffers);
- amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_slab_buffers, cs->slab_buffers);
- amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_sparse_buffers, cs->sparse_buffers);
+ amdgpu_add_fence_dependencies_bo_list(acs, cs, cs->fence, cs->num_real_buffers, cs->real_buffers);
+ amdgpu_add_fence_dependencies_bo_list(acs, cs, cs->fence, cs->num_slab_buffers, cs->slab_buffers);
+ amdgpu_add_fence_dependencies_bo_list(acs, cs, cs->fence, cs->num_sparse_buffers, cs->sparse_buffers);
}
static void amdgpu_cs_add_syncobj_signal(struct radeon_cmdbuf *rws,
@@ -1358,8 +1347,7 @@ static void amdgpu_cs_add_syncobj_signal(struct radeon_cmdbuf *rws,
* This is done late, during submission, to keep the buffer list short before
* submit, and to avoid managing fences for the backing buffers.
*/
-static bool amdgpu_add_sparse_backing_buffers(struct amdgpu_winsys *ws,
- struct amdgpu_cs_context *cs)
+static bool amdgpu_add_sparse_backing_buffers(struct amdgpu_cs_context *cs)
{
for (unsigned i = 0; i < cs->num_sparse_buffers; ++i) {
struct amdgpu_cs_buffer *buffer = &cs->sparse_buffers[i];
@@ -1371,14 +1359,14 @@ static bool amdgpu_add_sparse_backing_buffers(struct amdgpu_winsys *ws,
/* We can directly add the buffer here, because we know that each
* backing buffer occurs only once.
*/
- int idx = amdgpu_do_add_real_buffer(ws, cs, backing->bo);
+ int idx = amdgpu_do_add_real_buffer(cs, backing->bo);
if (idx < 0) {
fprintf(stderr, "%s: failed to add buffer\n", __FUNCTION__);
simple_mtx_unlock(&bo->lock);
return false;
}
- cs->real_buffers[idx].u.real.priority_usage = buffer->u.real.priority_usage;
+ cs->real_buffers[idx].usage = buffer->usage;
}
simple_mtx_unlock(&bo->lock);
@@ -1400,6 +1388,10 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
struct drm_amdgpu_bo_list_in bo_list_in;
unsigned initial_num_real_buffers = cs->num_real_buffers;
+ simple_mtx_lock(&ws->bo_fence_lock);
+ amdgpu_add_fence_dependencies_bo_lists(acs, cs);
+ simple_mtx_unlock(&ws->bo_fence_lock);
+
#if DEBUG
/* Prepare the buffer list. */
if (ws->debug_all_bos) {
@@ -1427,7 +1419,7 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
} else
#endif
{
- if (!amdgpu_add_sparse_backing_buffers(ws, cs)) {
+ if (!amdgpu_add_sparse_backing_buffers(cs)) {
fprintf(stderr, "amdgpu: amdgpu_add_sparse_backing_buffers failed\n");
r = -ENOMEM;
goto cleanup;
@@ -1439,10 +1431,10 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
unsigned num_handles = 0;
for (i = 0; i < cs->num_real_buffers; ++i) {
struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i];
- assert(buffer->u.real.priority_usage != 0);
list[num_handles].bo_handle = buffer->bo->u.real.kms_handle;
- list[num_handles].bo_priority = (util_last_bit(buffer->u.real.priority_usage) - 1) / 2;
+ list[num_handles].bo_priority =
+ (util_last_bit(buffer->usage & RADEON_ALL_PRIORITIES) - 1) / 2;
++num_handles;
}
@@ -1673,7 +1665,7 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
radeon_emit(rcs, 0xf0000000); /* NOP packet */
} else {
while (rcs->current.cdw & ib_pad_dw_mask)
- radeon_emit(rcs, 0x00000000); /* NOP packet */
+ radeon_emit(rcs, SDMA_NOP_PAD);
}
break;
case RING_GFX:
@@ -1730,26 +1722,19 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
cs->next_fence = NULL;
} else {
cur->fence = amdgpu_fence_create(cs->ctx,
- cur->ib[IB_MAIN].ip_type,
- cur->ib[IB_MAIN].ip_instance,
- cur->ib[IB_MAIN].ring);
+ cur->ib[IB_MAIN].ip_type);
}
if (fence)
amdgpu_fence_reference(fence, cur->fence);
- amdgpu_cs_sync_flush(rcs);
+ amdgpu_inc_bo_num_active_ioctls(cur->num_real_buffers, cur->real_buffers);
+ amdgpu_inc_bo_num_active_ioctls(cur->num_slab_buffers, cur->slab_buffers);
+ amdgpu_inc_bo_num_active_ioctls(cur->num_sparse_buffers, cur->sparse_buffers);
- /* Prepare buffers.
- *
- * This fence must be held until the submission is queued to ensure
- * that the order of fence dependency updates matches the order of
- * submissions.
- */
- simple_mtx_lock(&ws->bo_fence_lock);
- amdgpu_add_fence_dependencies_bo_lists(cs);
+ amdgpu_cs_sync_flush(rcs);
/* Swap command streams. "cst" is going to be submitted. */
- cs->csc = cs->cst;
+ rcs->csc = cs->csc = cs->cst;
cs->cst = cur;
/* Submit. */
@@ -1761,9 +1746,6 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
else
cs->csc->secure = cs->cst->secure;
- /* The submission has been queued, unlock the fence now. */
- simple_mtx_unlock(&ws->bo_fence_lock);
-
if (!(flags & PIPE_FLUSH_ASYNC)) {
amdgpu_cs_sync_flush(rcs);
error_code = cur->error_code;
@@ -1779,8 +1761,8 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
amdgpu_get_new_ib(ws, rcs, &cs->main, cs);
if (cs->preamble_ib_bo) {
- amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ, 0,
- RADEON_PRIO_IB1);
+ amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo,
+ RADEON_USAGE_READ | RADEON_PRIO_IB, 0);
}
rcs->used_gart_kb = 0;
@@ -1815,7 +1797,7 @@ static void amdgpu_cs_destroy(struct radeon_cmdbuf *rcs)
static bool amdgpu_bo_is_referenced(struct radeon_cmdbuf *rcs,
struct pb_buffer *_buf,
- enum radeon_bo_usage usage)
+ unsigned usage)
{
struct amdgpu_cs *cs = amdgpu_cs(rcs);
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)_buf;
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
index 1ce8e5bae..794d13bd0 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -50,15 +50,8 @@ struct amdgpu_ctx {
struct amdgpu_cs_buffer {
struct amdgpu_winsys_bo *bo;
- union {
- struct {
- uint32_t priority_usage;
- } real;
- struct {
- uint32_t real_idx; /* index of underlying real BO */
- } slab;
- } u;
- enum radeon_bo_usage usage;
+ unsigned slab_real_idx; /* index of underlying real BO, used by slab buffers only */
+ unsigned usage;
};
enum ib_type {
@@ -97,6 +90,8 @@ struct amdgpu_cs_context {
struct drm_amdgpu_cs_chunk_ib ib[IB_NUM];
uint32_t *ib_main_addr; /* the beginning of IB before chaining */
+ struct amdgpu_winsys *ws;
+
/* Buffers. */
unsigned max_real_buffers;
unsigned num_real_buffers;
@@ -115,7 +110,6 @@ struct amdgpu_cs_context {
struct amdgpu_winsys_bo *last_added_bo;
unsigned last_added_bo_index;
unsigned last_added_bo_usage;
- uint32_t last_added_bo_priority_usage;
struct amdgpu_fence_list fence_dependencies;
struct amdgpu_fence_list syncobj_dependencies;
@@ -130,7 +124,8 @@ struct amdgpu_cs_context {
bool secure;
};
-#define BUFFER_HASHLIST_SIZE 4096
+/* This high limit is needed for viewperf2020/catia. */
+#define BUFFER_HASHLIST_SIZE 32768
struct amdgpu_cs {
struct amdgpu_ib main; /* must be first because this is inherited */
@@ -243,7 +238,7 @@ amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
static inline bool
amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
struct amdgpu_winsys_bo *bo,
- enum radeon_bo_usage usage)
+ unsigned usage)
{
int index;
struct amdgpu_cs_buffer *buffer;
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_public.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_public.h
index 8702e4f6e..f403ed997 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_public.h
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_public.h
@@ -28,14 +28,12 @@
#define AMDGPU_PUBLIC_H
#include "pipe/p_defines.h"
+#include "gallium/winsys/radeon/drm/radeon_drm_public.h"
struct radeon_winsys;
struct pipe_screen;
struct pipe_screen_config;
-typedef struct pipe_screen *(*radeon_screen_create_t)(struct radeon_winsys *,
- const struct pipe_screen_config *config);
-
struct radeon_winsys *
amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
radeon_screen_create_t screen_create);
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index 1b3acf13b..533df39c3 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -62,7 +62,7 @@ static int amdgpu_surface_sanity(const struct pipe_resource *tex)
static int amdgpu_surface_init(struct radeon_winsys *rws,
const struct pipe_resource *tex,
- unsigned flags, unsigned bpe,
+ uint64_t flags, unsigned bpe,
enum radeon_surf_mode mode,
struct radeon_surf *surf)
{
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 06dcf3369..cc71b0e92 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -63,7 +63,8 @@ static void handle_env_var_force_family(struct amdgpu_winsys *ws)
if (!strcmp(family, ac_get_llvm_processor_name(i))) {
/* Override family and chip_class. */
ws->info.family = i;
- ws->info.name = "GCN-NOOP";
+ ws->info.name = "NOOP";
+ strcpy(ws->info.lowercase_name , "noop");
if (i >= CHIP_SIENNA_CICHLID)
ws->info.chip_class = GFX10_3;
@@ -115,7 +116,8 @@ static bool do_winsys_init(struct amdgpu_winsys *ws,
ws->debug_all_bos = debug_get_option_all_bos();
#endif
ws->reserve_vmid = strstr(debug_get_option("R600_DEBUG", ""), "reserve_vmid") != NULL ||
- strstr(debug_get_option("AMD_DEBUG", ""), "reserve_vmid") != NULL;
+ strstr(debug_get_option("AMD_DEBUG", ""), "reserve_vmid") != NULL ||
+ strstr(debug_get_option("AMD_DEBUG", ""), "sqtt") != NULL;
ws->zero_all_vram_allocs = strstr(debug_get_option("R600_DEBUG", ""), "zerovram") != NULL ||
driQueryOptionb(config->options, "radeonsi_zerovram");
@@ -153,7 +155,7 @@ static void do_winsys_deinit(struct amdgpu_winsys *ws)
FREE(ws);
}
-static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
+static void amdgpu_winsys_destroy_locked(struct radeon_winsys *rws, bool locked)
{
struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws);
struct amdgpu_winsys *ws = sws->aws;
@@ -165,7 +167,8 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
* amdgpu_winsys_create in another thread doesn't get the winsys
* from the table when the counter drops to 0.
*/
- simple_mtx_lock(&dev_tab_mutex);
+ if (!locked)
+ simple_mtx_lock(&dev_tab_mutex);
destroy = pipe_reference(&ws->reference, NULL);
if (destroy && dev_tab) {
@@ -176,7 +179,8 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
}
}
- simple_mtx_unlock(&dev_tab_mutex);
+ if (!locked)
+ simple_mtx_unlock(&dev_tab_mutex);
if (destroy)
do_winsys_deinit(ws);
@@ -185,6 +189,11 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
FREE(rws);
}
+static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
+{
+ amdgpu_winsys_destroy_locked(rws, false);
+}
+
static void amdgpu_winsys_query_info(struct radeon_winsys *rws,
struct radeon_info *info,
bool enable_smart_access_memory,
@@ -554,7 +563,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
* and link all drivers into one binary blob. */
ws->base.screen = screen_create(&ws->base, config);
if (!ws->base.screen) {
- amdgpu_winsys_destroy(&ws->base);
+ amdgpu_winsys_destroy_locked(&ws->base, true);
simple_mtx_unlock(&dev_tab_mutex);
return NULL;
}