summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2020-01-22 02:09:57 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2020-01-22 02:09:57 +0000
commit851ab880f8cbf08cc4d343259210addcb2715c09 (patch)
tree6fc84348c314a83a7344976a464ceb7bd2ed1b9d
parent1bea6551ba344ebede0403ac0943f701707cd9af (diff)
Import Mesa 19.2.8
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/gfx10_query.c792
1 files changed, 426 insertions, 366 deletions
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/gfx10_query.c b/lib/mesa/src/gallium/drivers/radeonsi/gfx10_query.c
index 98ee6ba3d..56ecbd548 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/gfx10_query.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/gfx10_query.c
@@ -22,442 +22,502 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#include <stddef.h>
+
#include "si_pipe.h"
#include "si_query.h"
-#include "sid.h"
#include "util/u_memory.h"
#include "util/u_suballoc.h"
+#include "sid.h"
-#include <stddef.h>
+/**
+ * The query buffer is written to by ESGS NGG shaders with statistics about
+ * generated and (streamout-)emitted primitives.
+ *
+ * The context maintains a ring of these query buffers, and queries simply
+ * point into the ring, allowing an arbitrary number of queries to be active
+ * without additional GPU cost.
+ */
+struct gfx10_sh_query_buffer {
+ struct list_head list;
+ struct si_resource *buf;
+ unsigned refcount;
+
+ /* Offset into the buffer in bytes; points at the first un-emitted entry. */
+ unsigned head;
+};
+
+/* Memory layout of the query buffer. Must be kept in sync with shaders
+ * (including QBO shaders) and should be aligned to cachelines.
+ *
+ * The somewhat awkward memory layout is for compatibility with the
+ * SET_PREDICATION packet, which also means that we're setting the high bit
+ * of all those values unconditionally.
+ */
+struct gfx10_sh_query_buffer_mem {
+ struct {
+ uint64_t generated_primitives_start_dummy;
+ uint64_t emitted_primitives_start_dummy;
+ uint64_t generated_primitives;
+ uint64_t emitted_primitives;
+ } stream[4];
+ uint32_t fence; /* bottom-of-pipe fence: set to ~0 when draws have finished */
+ uint32_t pad[31];
+};
+
+/* Shader-based queries. */
+struct gfx10_sh_query {
+ struct si_query b;
+
+ struct gfx10_sh_query_buffer *first;
+ struct gfx10_sh_query_buffer *last;
+ unsigned first_begin;
+ unsigned last_end;
+
+ unsigned stream;
+};
static void emit_shader_query(struct si_context *sctx)
{
- assert(!list_is_empty(&sctx->shader_query_buffers));
+ assert(!LIST_IS_EMPTY(&sctx->shader_query_buffers));
- struct gfx10_sh_query_buffer *qbuf =
- list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
- qbuf->head += sizeof(struct gfx10_sh_query_buffer_mem);
+ struct gfx10_sh_query_buffer *qbuf = list_last_entry(&sctx->shader_query_buffers,
+ struct gfx10_sh_query_buffer, list);
+ qbuf->head += sizeof(struct gfx10_sh_query_buffer_mem);
}
static void gfx10_release_query_buffers(struct si_context *sctx,
- struct gfx10_sh_query_buffer *first,
- struct gfx10_sh_query_buffer *last)
+ struct gfx10_sh_query_buffer *first,
+ struct gfx10_sh_query_buffer *last)
{
- while (first) {
- struct gfx10_sh_query_buffer *qbuf = first;
- if (first != last)
- first = list_entry(qbuf->list.next, struct gfx10_sh_query_buffer, list);
- else
- first = NULL;
-
- qbuf->refcount--;
- if (qbuf->refcount)
- continue;
-
- if (qbuf->list.next == &sctx->shader_query_buffers)
- continue; /* keep the most recent buffer; it may not be full yet */
- if (qbuf->list.prev == &sctx->shader_query_buffers)
- continue; /* keep the oldest buffer for recycling */
-
- list_del(&qbuf->list);
- si_resource_reference(&qbuf->buf, NULL);
- FREE(qbuf);
- }
+ while (first) {
+ struct gfx10_sh_query_buffer *qbuf = first;
+ if (first != last)
+ first = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.next, list);
+ else
+ first = NULL;
+
+ qbuf->refcount--;
+ if (qbuf->refcount)
+ continue;
+
+ if (qbuf->list.next == &sctx->shader_query_buffers)
+ continue; /* keep the most recent buffer; it may not be full yet */
+ if (qbuf->list.prev == &sctx->shader_query_buffers)
+ continue; /* keep the oldest buffer for recycling */
+
+ LIST_DEL(&qbuf->list);
+ si_resource_reference(&qbuf->buf, NULL);
+ FREE(qbuf);
+ }
}
static bool gfx10_alloc_query_buffer(struct si_context *sctx)
{
- if (si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query))
- return true;
-
- struct gfx10_sh_query_buffer *qbuf = NULL;
-
- if (!list_is_empty(&sctx->shader_query_buffers)) {
- qbuf = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
- if (qbuf->head + sizeof(struct gfx10_sh_query_buffer_mem) <= qbuf->buf->b.b.width0)
- goto success;
-
- qbuf = list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
- if (!qbuf->refcount &&
- !si_cs_is_buffer_referenced(sctx, qbuf->buf->buf, RADEON_USAGE_READWRITE) &&
- sctx->ws->buffer_wait(sctx->ws, qbuf->buf->buf, 0, RADEON_USAGE_READWRITE)) {
- /* Can immediately re-use the oldest buffer */
- list_del(&qbuf->list);
- } else {
- qbuf = NULL;
- }
- }
-
- if (!qbuf) {
- qbuf = CALLOC_STRUCT(gfx10_sh_query_buffer);
- if (unlikely(!qbuf))
- return false;
-
- struct si_screen *screen = sctx->screen;
- unsigned buf_size =
- MAX2(sizeof(struct gfx10_sh_query_buffer_mem), screen->info.min_alloc_size);
- qbuf->buf = si_resource(pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));
- if (unlikely(!qbuf->buf)) {
- FREE(qbuf);
- return false;
- }
- }
-
- /* The buffer is currently unused by the GPU. Initialize it.
- *
- * We need to set the high bit of all the primitive counters for
- * compatibility with the SET_PREDICATION packet.
- */
- uint64_t *results = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL,
- PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
- assert(results);
-
- for (unsigned i = 0, e = qbuf->buf->b.b.width0 / sizeof(struct gfx10_sh_query_buffer_mem); i < e;
- ++i) {
- for (unsigned j = 0; j < 16; ++j)
- results[32 * i + j] = (uint64_t)1 << 63;
- results[32 * i + 16] = 0;
- }
-
- list_addtail(&qbuf->list, &sctx->shader_query_buffers);
- qbuf->head = 0;
- qbuf->refcount = sctx->num_active_shader_queries;
+ if (si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query))
+ return true;
+
+ struct gfx10_sh_query_buffer *qbuf = NULL;
+
+ if (!LIST_IS_EMPTY(&sctx->shader_query_buffers)) {
+ qbuf = list_last_entry(&sctx->shader_query_buffers,
+ struct gfx10_sh_query_buffer, list);
+ if (qbuf->head + sizeof(struct gfx10_sh_query_buffer_mem) <= qbuf->buf->b.b.width0)
+ goto success;
+
+ qbuf = list_first_entry(&sctx->shader_query_buffers,
+ struct gfx10_sh_query_buffer, list);
+ if (!qbuf->refcount &&
+ !si_rings_is_buffer_referenced(sctx, qbuf->buf->buf, RADEON_USAGE_READWRITE) &&
+ sctx->ws->buffer_wait(qbuf->buf->buf, 0, RADEON_USAGE_READWRITE)) {
+ /* Can immediately re-use the oldest buffer */
+ LIST_DEL(&qbuf->list);
+ } else {
+ qbuf = NULL;
+ }
+ }
+
+ if (!qbuf) {
+ qbuf = CALLOC_STRUCT(gfx10_sh_query_buffer);
+ if (unlikely(!qbuf))
+ return false;
+
+ struct si_screen *screen = sctx->screen;
+ unsigned buf_size = MAX2(sizeof(struct gfx10_sh_query_buffer_mem),
+ screen->info.min_alloc_size);
+ qbuf->buf = si_resource(
+ pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));
+ if (unlikely(!qbuf->buf)) {
+ FREE(qbuf);
+ return false;
+ }
+ }
+
+ /* The buffer is currently unused by the GPU. Initialize it.
+ *
+ * We need to set the high bit of all the primitive counters for
+ * compatibility with the SET_PREDICATION packet.
+ */
+ uint64_t *results = sctx->ws->buffer_map(qbuf->buf->buf, NULL,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_UNSYNCHRONIZED);
+ assert(results);
+
+ for (unsigned i = 0, e = qbuf->buf->b.b.width0 / sizeof(struct gfx10_sh_query_buffer_mem);
+ i < e; ++i) {
+ for (unsigned j = 0; j < 16; ++j)
+ results[32 * i + j] = (uint64_t)1 << 63;
+ results[32 * i + 16] = 0;
+ }
+
+ LIST_ADDTAIL(&qbuf->list, &sctx->shader_query_buffers);
+ qbuf->head = 0;
+ qbuf->refcount = sctx->num_active_shader_queries;
success:;
- struct pipe_shader_buffer sbuf;
- sbuf.buffer = &qbuf->buf->b.b;
- sbuf.buffer_offset = qbuf->head;
- sbuf.buffer_size = sizeof(struct gfx10_sh_query_buffer_mem);
- si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, &sbuf);
- SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 1);
-
- si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query);
- return true;
+ struct pipe_shader_buffer sbuf;
+ sbuf.buffer = &qbuf->buf->b.b;
+ sbuf.buffer_offset = qbuf->head;
+ sbuf.buffer_size = sizeof(struct gfx10_sh_query_buffer_mem);
+ si_set_rw_shader_buffer(sctx, GFX10_GS_QUERY_BUF, &sbuf);
+ sctx->current_vs_state |= S_VS_STATE_STREAMOUT_QUERY_ENABLED(1);
+
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query);
+ return true;
}
static void gfx10_sh_query_destroy(struct si_context *sctx, struct si_query *rquery)
{
- struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
- gfx10_release_query_buffers(sctx, query->first, query->last);
- FREE(query);
+ struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
+ gfx10_release_query_buffers(sctx, query->first, query->last);
+ FREE(query);
}
static bool gfx10_sh_query_begin(struct si_context *sctx, struct si_query *rquery)
{
- struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
+ struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
- gfx10_release_query_buffers(sctx, query->first, query->last);
- query->first = query->last = NULL;
+ gfx10_release_query_buffers(sctx, query->first, query->last);
+ query->first = query->last = NULL;
- if (unlikely(!gfx10_alloc_query_buffer(sctx)))
- return false;
+ if (unlikely(!gfx10_alloc_query_buffer(sctx)))
+ return false;
- query->first = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
- query->first_begin = query->first->head;
+ query->first = list_last_entry(&sctx->shader_query_buffers,
+ struct gfx10_sh_query_buffer, list);
+ query->first_begin = query->first->head;
- sctx->num_active_shader_queries++;
- query->first->refcount++;
+ sctx->num_active_shader_queries++;
+ query->first->refcount++;
- return true;
+ return true;
}
static bool gfx10_sh_query_end(struct si_context *sctx, struct si_query *rquery)
{
- struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
-
- if (unlikely(!query->first))
- return false; /* earlier out of memory error */
-
- query->last = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
- query->last_end = query->last->head;
-
- /* Signal the fence of the previous chunk */
- if (query->last_end != 0) {
- uint64_t fence_va = query->last->buf->gpu_address;
- fence_va += query->last_end - sizeof(struct gfx10_sh_query_buffer_mem);
- fence_va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
- si_cp_release_mem(sctx, &sctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
- EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, query->last->buf, fence_va,
- 0xffffffff, PIPE_QUERY_GPU_FINISHED);
- }
-
- sctx->num_active_shader_queries--;
-
- if (sctx->num_active_shader_queries <= 0 || !si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query)) {
- si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, NULL);
- SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 0);
-
- /* If a query_begin is followed by a query_end without a draw
- * in-between, we need to clear the atom to ensure that the
- * next query_begin will re-initialize the shader buffer. */
- si_set_atom_dirty(sctx, &sctx->atoms.s.shader_query, false);
- }
-
- return true;
+ struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
+
+ if (unlikely(!query->first))
+ return false; /* earlier out of memory error */
+
+ query->last = list_last_entry(&sctx->shader_query_buffers,
+ struct gfx10_sh_query_buffer, list);
+ query->last_end = query->last->head;
+
+ /* Signal the fence of the previous chunk */
+ if (query->last_end != 0) {
+ uint64_t fence_va = query->last->buf->gpu_address;
+ fence_va += query->last_end - sizeof(struct gfx10_sh_query_buffer_mem);
+ fence_va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
+ si_cp_release_mem(sctx, sctx->gfx_cs,
+ V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
+ EOP_DATA_SEL_VALUE_32BIT,
+ query->last->buf, fence_va, 0xffffffff,
+ PIPE_QUERY_GPU_FINISHED);
+ }
+
+ sctx->num_active_shader_queries--;
+
+ if (sctx->num_active_shader_queries > 0) {
+ gfx10_alloc_query_buffer(sctx);
+ } else {
+ si_set_rw_shader_buffer(sctx, GFX10_GS_QUERY_BUF, NULL);
+ sctx->current_vs_state &= C_VS_STATE_STREAMOUT_QUERY_ENABLED;
+
+ /* If a query_begin is followed by a query_end without a draw
+ * in-between, we need to clear the atom to ensure that the
+ * next query_begin will re-initialize the shader buffer. */
+ si_set_atom_dirty(sctx, &sctx->atoms.s.shader_query, false);
+ }
+
+ return true;
}
static void gfx10_sh_query_add_result(struct gfx10_sh_query *query,
- struct gfx10_sh_query_buffer_mem *qmem,
- union pipe_query_result *result)
+ struct gfx10_sh_query_buffer_mem *qmem,
+ union pipe_query_result *result)
{
- static const uint64_t mask = ((uint64_t)1 << 63) - 1;
-
- switch (query->b.type) {
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- result->u64 += qmem->stream[query->stream].emitted_primitives & mask;
- break;
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- result->u64 += qmem->stream[query->stream].generated_primitives & mask;
- break;
- case PIPE_QUERY_SO_STATISTICS:
- result->so_statistics.num_primitives_written +=
- qmem->stream[query->stream].emitted_primitives & mask;
- result->so_statistics.primitives_storage_needed +=
- qmem->stream[query->stream].generated_primitives & mask;
- break;
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- result->b |= qmem->stream[query->stream].emitted_primitives !=
- qmem->stream[query->stream].generated_primitives;
- break;
- case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
- for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) {
- result->b |= qmem->stream[stream].emitted_primitives !=
- qmem->stream[stream].generated_primitives;
- }
- break;
- default:
- assert(0);
- }
+ static const uint64_t mask = ((uint64_t)1 << 63) - 1;
+
+ switch (query->b.type) {
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ result->u64 += qmem->stream[query->stream].emitted_primitives & mask;
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ result->u64 += qmem->stream[query->stream].generated_primitives & mask;
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ result->so_statistics.num_primitives_written +=
+ qmem->stream[query->stream].emitted_primitives & mask;
+ result->so_statistics.primitives_storage_needed +=
+ qmem->stream[query->stream].generated_primitives & mask;
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ result->b |= qmem->stream[query->stream].emitted_primitives !=
+ qmem->stream[query->stream].generated_primitives;
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) {
+ result->b |= qmem->stream[query->stream].emitted_primitives !=
+ qmem->stream[query->stream].generated_primitives;
+ }
+ break;
+ default:
+ assert(0);
+ }
}
-static bool gfx10_sh_query_get_result(struct si_context *sctx, struct si_query *rquery, bool wait,
- union pipe_query_result *result)
+static bool gfx10_sh_query_get_result(struct si_context *sctx, struct si_query *rquery,
+ bool wait, union pipe_query_result *result)
{
- struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
+ struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
- util_query_clear_result(result, query->b.type);
+ util_query_clear_result(result, query->b.type);
- if (unlikely(!query->first))
- return false; /* earlier out of memory error */
- assert(query->last);
+ if (unlikely(!query->first))
+ return false; /* earlier out of memory error */
+ assert(query->last);
- for (struct gfx10_sh_query_buffer *qbuf = query->last;;
- qbuf = list_entry(qbuf->list.prev, struct gfx10_sh_query_buffer, list)) {
- unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK);
- void *map;
+ for (struct gfx10_sh_query_buffer *qbuf = query->last;;
+ qbuf = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.prev, list)) {
+ unsigned usage = PIPE_TRANSFER_READ |
+ (wait ? 0 : PIPE_TRANSFER_DONTBLOCK);
+ void *map;
- if (rquery->b.flushed)
- map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage);
- else
- map = si_buffer_map(sctx, qbuf->buf, usage);
+ if (rquery->b.flushed)
+ map = sctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
+ else
+ map = si_buffer_map_sync_with_rings(sctx, qbuf->buf, usage);
- if (!map)
- return false;
+ if (!map)
+ return false;
- unsigned results_begin = 0;
- unsigned results_end = qbuf->head;
- if (qbuf == query->first)
- results_begin = query->first_begin;
- if (qbuf == query->last)
- results_end = query->last_end;
+ unsigned results_begin = 0;
+ unsigned results_end = qbuf->head;
+ if (qbuf == query->first)
+ results_begin = query->first_begin;
+ if (qbuf == query->last)
+ results_end = query->last_end;
- while (results_begin != results_end) {
- struct gfx10_sh_query_buffer_mem *qmem = map + results_begin;
- results_begin += sizeof(*qmem);
+ while (results_begin != results_end) {
+ struct gfx10_sh_query_buffer_mem *qmem = map + results_begin;
+ results_begin += sizeof(*qmem);
- gfx10_sh_query_add_result(query, qmem, result);
- }
+ gfx10_sh_query_add_result(query, qmem, result);
+ }
- if (qbuf == query->first)
- break;
- }
+ if (qbuf == query->first)
+ break;
+ }
- return true;
+ return true;
}
-static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct si_query *rquery,
- enum pipe_query_flags flags,
- enum pipe_query_value_type result_type,
- int index, struct pipe_resource *resource,
- unsigned offset)
+static void gfx10_sh_query_get_result_resource(struct si_context *sctx,
+ struct si_query *rquery,
+ bool wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset)
{
- struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
- struct si_qbo_state saved_state = {};
- struct pipe_resource *tmp_buffer = NULL;
- unsigned tmp_buffer_offset = 0;
-
- if (!sctx->sh_query_result_shader) {
- sctx->sh_query_result_shader = gfx10_create_sh_query_result_cs(sctx);
- if (!sctx->sh_query_result_shader)
- return;
- }
-
- if (query->first != query->last) {
- u_suballocator_alloc(&sctx->allocator_zeroed_memory, 16, 16, &tmp_buffer_offset, &tmp_buffer);
- if (!tmp_buffer)
- return;
- }
-
- si_save_qbo_state(sctx, &saved_state);
-
- /* Pre-fill the constants configuring the shader behavior. */
- struct {
- uint32_t config;
- uint32_t offset;
- uint32_t chain;
- uint32_t result_count;
- } consts;
- struct pipe_constant_buffer constant_buffer = {};
-
- if (index >= 0) {
- switch (query->b.type) {
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- consts.offset = 4 * sizeof(uint64_t) * query->stream + 2 * sizeof(uint64_t);
- consts.config = 0;
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- consts.offset = 4 * sizeof(uint64_t) * query->stream + 3 * sizeof(uint64_t);
- consts.config = 0;
- break;
- case PIPE_QUERY_SO_STATISTICS:
- consts.offset = sizeof(uint32_t) * (4 * index + query->stream);
- consts.config = 0;
- break;
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- consts.offset = 4 * sizeof(uint64_t) * query->stream;
- consts.config = 2;
- break;
- case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
- consts.offset = 0;
- consts.config = 3;
- break;
- default:
- unreachable("bad query type");
- }
- } else {
- /* Check result availability. */
- consts.offset = 0;
- consts.config = 1;
- }
-
- if (result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64)
- consts.config |= 8;
-
- constant_buffer.buffer_size = sizeof(consts);
- constant_buffer.user_buffer = &consts;
-
- /* Pre-fill the SSBOs and grid. */
- struct pipe_shader_buffer ssbo[3];
- struct pipe_grid_info grid = {};
-
- ssbo[1].buffer = tmp_buffer;
- ssbo[1].buffer_offset = tmp_buffer_offset;
- ssbo[1].buffer_size = 16;
-
- ssbo[2] = ssbo[1];
-
- grid.block[0] = 1;
- grid.block[1] = 1;
- grid.block[2] = 1;
- grid.grid[0] = 1;
- grid.grid[1] = 1;
- grid.grid[2] = 1;
-
- struct gfx10_sh_query_buffer *qbuf = query->first;
- for (;;) {
- unsigned begin = qbuf == query->first ? query->first_begin : 0;
- unsigned end = qbuf == query->last ? query->last_end : qbuf->buf->b.b.width0;
- if (!end)
- continue;
-
- ssbo[0].buffer = &qbuf->buf->b.b;
- ssbo[0].buffer_offset = begin;
- ssbo[0].buffer_size = end - begin;
-
- consts.result_count = (end - begin) / sizeof(struct gfx10_sh_query_buffer_mem);
- consts.chain = 0;
- if (qbuf != query->first)
- consts.chain |= 1;
- if (qbuf != query->last)
- consts.chain |= 2;
-
- if (qbuf == query->last) {
- ssbo[2].buffer = resource;
- ssbo[2].buffer_offset = offset;
- ssbo[2].buffer_size = 8;
- }
-
- sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, false, &constant_buffer);
-
- if (flags & PIPE_QUERY_WAIT) {
- uint64_t va;
-
- /* Wait for result availability. Wait only for readiness
- * of the last entry, since the fence writes should be
- * serialized in the CP.
- */
- va = qbuf->buf->gpu_address;
- va += end - sizeof(struct gfx10_sh_query_buffer_mem);
- va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
-
- si_cp_wait_mem(sctx, &sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);
- }
-
- /* ssbo[2] is either tmp_buffer or resource */
- assert(ssbo[2].buffer);
- si_launch_grid_internal_ssbos(sctx, &grid, sctx->sh_query_result_shader,
- SI_OP_SYNC_PS_BEFORE | SI_OP_SYNC_AFTER, SI_COHERENCY_SHADER,
- 3, ssbo, (1 << 2) | (ssbo[1].buffer ? 1 << 1 : 0));
-
- if (qbuf == query->last)
- break;
- qbuf = list_entry(qbuf->list.next, struct gfx10_sh_query_buffer, list);
- }
-
- si_restore_qbo_state(sctx, &saved_state);
- pipe_resource_reference(&tmp_buffer, NULL);
+ struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
+ struct si_qbo_state saved_state = {};
+ struct pipe_resource *tmp_buffer = NULL;
+ unsigned tmp_buffer_offset = 0;
+
+ if (!sctx->sh_query_result_shader) {
+ sctx->sh_query_result_shader = gfx10_create_sh_query_result_cs(sctx);
+ if (!sctx->sh_query_result_shader)
+ return;
+ }
+
+ if (query->first != query->last) {
+ u_suballocator_alloc(sctx->allocator_zeroed_memory, 16, 16,
+ &tmp_buffer_offset, &tmp_buffer);
+ if (!tmp_buffer)
+ return;
+ }
+
+ si_save_qbo_state(sctx, &saved_state);
+
+ /* Pre-fill the constants configuring the shader behavior. */
+ struct {
+ uint32_t config;
+ uint32_t offset;
+ uint32_t chain;
+ uint32_t result_count;
+ } consts;
+ struct pipe_constant_buffer constant_buffer = {};
+
+ if (index >= 0) {
+ switch (query->b.type) {
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ consts.offset = sizeof(uint32_t) * query->stream;
+ consts.config = 0;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ consts.offset = sizeof(uint32_t) * (4 + query->stream);
+ consts.config = 0;
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ consts.offset = sizeof(uint32_t) * (4 * index + query->stream);
+ consts.config = 0;
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ consts.offset = sizeof(uint32_t) * query->stream;
+ consts.config = 2;
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ consts.offset = 0;
+ consts.config = 3;
+ break;
+ default: unreachable("bad query type");
+ }
+ } else {
+ /* Check result availability. */
+ consts.offset = 0;
+ consts.config = 1;
+ }
+
+ if (result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64)
+ consts.config |= 8;
+
+ constant_buffer.buffer_size = sizeof(consts);
+ constant_buffer.user_buffer = &consts;
+
+ /* Pre-fill the SSBOs and grid. */
+ struct pipe_shader_buffer ssbo[3];
+ struct pipe_grid_info grid = {};
+
+ ssbo[1].buffer = tmp_buffer;
+ ssbo[1].buffer_offset = tmp_buffer_offset;
+ ssbo[1].buffer_size = 16;
+
+ ssbo[2] = ssbo[1];
+
+ sctx->b.bind_compute_state(&sctx->b, sctx->sh_query_result_shader);
+
+ grid.block[0] = 1;
+ grid.block[1] = 1;
+ grid.block[2] = 1;
+ grid.grid[0] = 1;
+ grid.grid[1] = 1;
+ grid.grid[2] = 1;
+
+ struct gfx10_sh_query_buffer *qbuf = query->first;
+ for (;;) {
+ unsigned begin = qbuf == query->first ? query->first_begin : 0;
+ unsigned end = qbuf == query->last ? query->last_end : qbuf->buf->b.b.width0;
+ if (!end)
+ continue;
+
+ ssbo[0].buffer = &qbuf->buf->b.b;
+ ssbo[0].buffer_offset = begin;
+ ssbo[0].buffer_size = end - begin;
+
+ consts.result_count = (end - begin) / sizeof(struct gfx10_sh_query_buffer_mem);
+ consts.chain = 0;
+ if (qbuf != query->first)
+ consts.chain |= 1;
+ if (qbuf != query->last)
+ consts.chain |= 2;
+
+ if (qbuf == query->last) {
+ ssbo[2].buffer = resource;
+ ssbo[2].buffer_offset = offset;
+ ssbo[2].buffer_size = 8;
+ }
+
+ sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer);
+ sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo, 0x6);
+
+ if (wait) {
+ uint64_t va;
+
+ /* Wait for result availability. Wait only for readiness
+ * of the last entry, since the fence writes should be
+ * serialized in the CP.
+ */
+ va = qbuf->buf->gpu_address;
+ va += end - sizeof(struct gfx10_sh_query_buffer_mem);
+ va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
+
+ si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);
+ }
+
+ sctx->b.launch_grid(&sctx->b, &grid);
+ sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
+
+ if (qbuf == query->last)
+ break;
+ qbuf = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.next, list);
+ }
+
+ si_restore_qbo_state(sctx, &saved_state);
+ pipe_resource_reference(&tmp_buffer, NULL);
}
static const struct si_query_ops gfx10_sh_query_ops = {
- .destroy = gfx10_sh_query_destroy,
- .begin = gfx10_sh_query_begin,
- .end = gfx10_sh_query_end,
- .get_result = gfx10_sh_query_get_result,
- .get_result_resource = gfx10_sh_query_get_result_resource,
+ .destroy = gfx10_sh_query_destroy,
+ .begin = gfx10_sh_query_begin,
+ .end = gfx10_sh_query_end,
+ .get_result = gfx10_sh_query_get_result,
+ .get_result_resource = gfx10_sh_query_get_result_resource,
};
-struct pipe_query *gfx10_sh_query_create(struct si_screen *screen, enum pipe_query_type query_type,
- unsigned index)
+struct pipe_query *gfx10_sh_query_create(struct si_screen *screen,
+ enum pipe_query_type query_type,
+ unsigned index)
{
- struct gfx10_sh_query *query = CALLOC_STRUCT(gfx10_sh_query);
- if (unlikely(!query))
- return NULL;
+ struct gfx10_sh_query *query = CALLOC_STRUCT(gfx10_sh_query);
+ if (unlikely(!query))
+ return NULL;
- query->b.ops = &gfx10_sh_query_ops;
- query->b.type = query_type;
- query->stream = index;
+ query->b.ops = &gfx10_sh_query_ops;
+ query->b.type = query_type;
+ query->stream = index;
- return (struct pipe_query *)query;
+ return (struct pipe_query *)query;
}
void gfx10_init_query(struct si_context *sctx)
{
- list_inithead(&sctx->shader_query_buffers);
- sctx->atoms.s.shader_query.emit = emit_shader_query;
+ LIST_INITHEAD(&sctx->shader_query_buffers);
+ sctx->atoms.s.shader_query.emit = emit_shader_query;
}
void gfx10_destroy_query(struct si_context *sctx)
{
- if (!sctx->shader_query_buffers.next)
- return;
-
- while (!list_is_empty(&sctx->shader_query_buffers)) {
- struct gfx10_sh_query_buffer *qbuf =
- list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
- list_del(&qbuf->list);
-
- assert(!qbuf->refcount);
- si_resource_reference(&qbuf->buf, NULL);
- FREE(qbuf);
- }
+ while (!LIST_IS_EMPTY(&sctx->shader_query_buffers)) {
+ struct gfx10_sh_query_buffer *qbuf =
+ list_first_entry(&sctx->shader_query_buffers,
+ struct gfx10_sh_query_buffer, list);
+ LIST_DEL(&qbuf->list);
+
+ assert(!qbuf->refcount);
+ si_resource_reference(&qbuf->buf, NULL);
+ FREE(qbuf);
+ }
}