summaryrefslogtreecommitdiff
path: root/lib/mesa/src
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2020-08-26 05:29:31 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2020-08-26 05:29:31 +0000
commitb588b4f3eff82e42345c0b15670ab089b53f9cd6 (patch)
tree49350a8ad21d0a8b6f6b5313a33a3080ae81d821 /lib/mesa/src
parent2ebab484cac65c01dd19e8c1b62eb58c83074390 (diff)
Import Mesa 20.1.6
Diffstat (limited to 'lib/mesa/src')
-rw-r--r--lib/mesa/src/amd/Android.compiler.mk4
-rw-r--r--lib/mesa/src/freedreno/vulkan/tu_wsi_x11.c4
-rw-r--r--lib/mesa/src/gallium/drivers/panfrost/Android.mk3
-rw-r--r--lib/mesa/src/gallium/drivers/panfrost/Makefile.sources18
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_draw.c678
-rw-r--r--lib/mesa/src/intel/compiler/brw_gen_enum.h53
-rw-r--r--lib/mesa/src/intel/perf/gen_perf_private.h9
-rw-r--r--lib/mesa/src/intel/perf/gen_perf_query.c264
-rw-r--r--lib/mesa/src/intel/perf/gen_perf_query.h10
-rw-r--r--lib/mesa/src/intel/perf/gen_perf_regs.h63
10 files changed, 367 insertions, 739 deletions
diff --git a/lib/mesa/src/amd/Android.compiler.mk b/lib/mesa/src/amd/Android.compiler.mk
index 7d4a1ea43..3f544ac3c 100644
--- a/lib/mesa/src/amd/Android.compiler.mk
+++ b/lib/mesa/src/amd/Android.compiler.mk
@@ -32,8 +32,10 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libmesa_aco
+# filter-out compiler/aco_instruction_selection_setup.cpp because
+# it's already included by compiler/aco_instruction_selection.cpp
LOCAL_SRC_FILES := \
- $(ACO_FILES)
+ $(filter-out compiler/aco_instruction_selection_setup.cpp, $(ACO_FILES))
LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU # instructs LLVM to declare LLVMInitializeAMDGPU* functions
diff --git a/lib/mesa/src/freedreno/vulkan/tu_wsi_x11.c b/lib/mesa/src/freedreno/vulkan/tu_wsi_x11.c
index e6ce75e7f..180e504a0 100644
--- a/lib/mesa/src/freedreno/vulkan/tu_wsi_x11.c
+++ b/lib/mesa/src/freedreno/vulkan/tu_wsi_x11.c
@@ -75,7 +75,7 @@ VkResult tu_CreateXcbSurfaceKHR(
if (pAllocator)
alloc = pAllocator;
else
- alloc = &instance->vk.alloc;
+ alloc = &instance->alloc;
return wsi_create_xcb_surface(alloc, pCreateInfo, pSurface);
}
@@ -94,7 +94,7 @@ VkResult tu_CreateXlibSurfaceKHR(
if (pAllocator)
alloc = pAllocator;
else
- alloc = &instance->vk.alloc;
+ alloc = &instance->alloc;
return wsi_create_xlib_surface(alloc, pCreateInfo, pSurface);
}
diff --git a/lib/mesa/src/gallium/drivers/panfrost/Android.mk b/lib/mesa/src/gallium/drivers/panfrost/Android.mk
index 48c4d52c7..c7ad6e175 100644
--- a/lib/mesa/src/gallium/drivers/panfrost/Android.mk
+++ b/lib/mesa/src/gallium/drivers/panfrost/Android.mk
@@ -42,7 +42,8 @@ LOCAL_STATIC_LIBRARIES := \
libmesa_nir \
libmesa_winsys_panfrost \
libpanfrost_bifrost \
- libpanfrost_lib \
+ libpanfrost_decode \
+ libpanfrost_encoder \
libpanfrost_midgard \
libpanfrost_shared \
libpanfrost_util \
diff --git a/lib/mesa/src/gallium/drivers/panfrost/Makefile.sources b/lib/mesa/src/gallium/drivers/panfrost/Makefile.sources
index 470dfb31e..c734cd080 100644
--- a/lib/mesa/src/gallium/drivers/panfrost/Makefile.sources
+++ b/lib/mesa/src/gallium/drivers/panfrost/Makefile.sources
@@ -1,17 +1,31 @@
C_SOURCES := \
+ nir/nir_lower_blend.c \
+ nir/nir_lower_blend.h \
+ nir/nir_lower_framebuffer.c \
+ \
+ pan_allocate.c \
+ pan_allocate.h \
pan_assemble.c \
pan_blend_cso.c \
- pan_blend_cso.h \
+ pan_blend.h \
+ pan_blending.c \
+ pan_blending.h \
+ pan_blend_shaders.c \
+ pan_blend_shaders.h \
pan_blit.c \
pan_cmdstream.c \
pan_cmdstream.h \
pan_compute.c \
pan_context.c \
pan_context.h \
+ pan_fragment.c \
pan_job.c \
pan_job.h \
+ pan_mfbd.c \
pan_public.h \
pan_resource.c \
pan_resource.h \
+ pan_scoreboard.c \
pan_screen.c \
- pan_screen.h
+ pan_screen.h \
+ pan_sfbd.c \
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_draw.c b/lib/mesa/src/gallium/drivers/zink/zink_draw.c
index 8fc31b13d..553579acf 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_draw.c
+++ b/lib/mesa/src/gallium/drivers/zink/zink_draw.c
@@ -1,141 +1,39 @@
#include "zink_compiler.h"
#include "zink_context.h"
#include "zink_program.h"
-#include "zink_query.h"
#include "zink_resource.h"
#include "zink_screen.h"
#include "zink_state.h"
-#include "zink_surface.h"
#include "indices/u_primconvert.h"
-#include "tgsi/tgsi_from_mesa.h"
#include "util/hash_table.h"
#include "util/u_debug.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
#include "util/u_prim.h"
-#include "util/u_prim_restart.h"
-
-static void
-zink_emit_xfb_counter_barrier(struct zink_context *ctx)
-{
- /* Between the pause and resume there needs to be a memory barrier for the counter buffers
- * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
- * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
- * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
- * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT.
- *
- * - from VK_EXT_transform_feedback spec
- */
- for (unsigned i = 0; i < ctx->num_so_targets; i++) {
- struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
- if (!t)
- continue;
- struct zink_resource *res = zink_resource(t->counter_buffer);
- if (t->counter_buffer_valid)
- zink_resource_buffer_barrier(ctx, NULL, res, VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT,
- VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
- else
- zink_resource_buffer_barrier(ctx, NULL, res, VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT,
- VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT);
- }
- ctx->xfb_barrier = false;
-}
-
-static void
-zink_emit_xfb_vertex_input_barrier(struct zink_context *ctx, struct zink_resource *res)
+static VkDescriptorSet
+allocate_descriptor_set(struct zink_screen *screen,
+ struct zink_batch *batch,
+ struct zink_gfx_program *prog)
{
- /* A pipeline barrier is required between using the buffers as
- * transform feedback buffers and vertex buffers to
- * ensure all writes to the transform feedback buffers are visible
- * when the data is read as vertex attributes.
- * The source access is VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT
- * and the destination access is VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
- * for the pipeline stages VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
- * and VK_PIPELINE_STAGE_VERTEX_INPUT_BIT respectively.
- *
- * - 20.3.1. Drawing Transform Feedback
- */
- zink_resource_buffer_barrier(ctx, NULL, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
-}
-
-static void
-zink_emit_stream_output_targets(struct pipe_context *pctx)
-{
- struct zink_context *ctx = zink_context(pctx);
- struct zink_screen *screen = zink_screen(pctx->screen);
- struct zink_batch *batch = &ctx->batch;
- VkBuffer buffers[PIPE_MAX_SO_OUTPUTS] = {};
- VkDeviceSize buffer_offsets[PIPE_MAX_SO_OUTPUTS] = {};
- VkDeviceSize buffer_sizes[PIPE_MAX_SO_OUTPUTS] = {};
-
- for (unsigned i = 0; i < ctx->num_so_targets; i++) {
- struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
- if (!t) {
- /* no need to reference this or anything */
- buffers[i] = zink_resource(ctx->dummy_xfb_buffer)->obj->buffer;
- buffer_offsets[i] = 0;
- buffer_sizes[i] = sizeof(uint8_t);
- continue;
- }
- struct zink_resource *res = zink_resource(t->base.buffer);
- if (!(res->bind_history & ZINK_RESOURCE_USAGE_STREAMOUT))
- /* resource has been rebound */
- t->counter_buffer_valid = false;
- buffers[i] = res->obj->buffer;
- zink_resource_buffer_barrier(ctx, NULL, zink_resource(t->base.buffer),
- VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT);
- zink_batch_reference_resource_rw(batch, res, true);
- buffer_offsets[i] = t->base.buffer_offset;
- buffer_sizes[i] = t->base.buffer_size;
- res->bind_history |= ZINK_RESOURCE_USAGE_STREAMOUT;
- util_range_add(t->base.buffer, &res->valid_buffer_range, t->base.buffer_offset,
- t->base.buffer_offset + t->base.buffer_size);
+ assert(batch->descs_left >= prog->num_descriptors);
+ VkDescriptorSetAllocateInfo dsai;
+ memset((void *)&dsai, 0, sizeof(dsai));
+ dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
+ dsai.pNext = NULL;
+ dsai.descriptorPool = batch->descpool;
+ dsai.descriptorSetCount = 1;
+ dsai.pSetLayouts = &prog->dsl;
+
+ VkDescriptorSet desc_set;
+ if (vkAllocateDescriptorSets(screen->dev, &dsai, &desc_set) != VK_SUCCESS) {
+ debug_printf("ZINK: failed to allocate descriptor set :/");
+ return VK_NULL_HANDLE;
}
- screen->vk_CmdBindTransformFeedbackBuffersEXT(batch->state->cmdbuf, 0, ctx->num_so_targets,
- buffers, buffer_offsets,
- buffer_sizes);
- ctx->dirty_so_targets = false;
-}
-
-static void
-barrier_vertex_buffers(struct zink_context *ctx)
-{
- const struct zink_vertex_elements_state *elems = ctx->element_state;
- for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
- struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->binding_map[i];
- assert(vb);
- if (vb->buffer.resource) {
- struct zink_resource *res = zink_resource(vb->buffer.resource);
- zink_resource_buffer_barrier(ctx, NULL, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
- }
- }
-}
-
-static void
-check_buffer_barrier(struct zink_context *ctx, struct pipe_resource *pres, VkAccessFlags flags, VkPipelineStageFlags pipeline)
-{
- struct zink_resource *res = zink_resource(pres);
- zink_resource_buffer_barrier(ctx, NULL, res, flags, pipeline);
-}
-
-static void
-barrier_draw_buffers(struct zink_context *ctx, const struct pipe_draw_info *dinfo,
- const struct pipe_draw_indirect_info *dindirect, struct pipe_resource *index_buffer)
-{
- if (index_buffer)
- check_buffer_barrier(ctx, index_buffer, VK_ACCESS_INDEX_READ_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
- if (dindirect && dindirect->buffer) {
- check_buffer_barrier(ctx, dindirect->buffer,
- VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
- if (dindirect->indirect_draw_count)
- check_buffer_barrier(ctx, dindirect->indirect_draw_count,
- VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
- }
+ batch->descs_left -= prog->num_descriptors;
+ return desc_set;
}
static void
@@ -143,100 +41,43 @@ zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx)
{
VkBuffer buffers[PIPE_MAX_ATTRIBS];
VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS];
- VkDeviceSize buffer_strides[PIPE_MAX_ATTRIBS];
const struct zink_vertex_elements_state *elems = ctx->element_state;
- struct zink_screen *screen = zink_screen(ctx->base.screen);
-
- if (!elems->hw_state.num_bindings)
- return;
-
for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
- struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->binding_map[i];
+ struct pipe_vertex_buffer *vb = ctx->buffers + ctx->element_state->binding_map[i];
assert(vb);
if (vb->buffer.resource) {
struct zink_resource *res = zink_resource(vb->buffer.resource);
- buffers[i] = res->obj->buffer;
+ buffers[i] = res->buffer;
buffer_offsets[i] = vb->buffer_offset;
- buffer_strides[i] = vb->stride;
- zink_batch_reference_resource_rw(batch, res, false);
+ zink_batch_reference_resoure(batch, res);
} else {
- buffers[i] = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
+ buffers[i] = zink_resource(ctx->dummy_buffer)->buffer;
buffer_offsets[i] = 0;
- buffer_strides[i] = 0;
}
}
- if (screen->info.have_EXT_extended_dynamic_state)
- screen->vk_CmdBindVertexBuffers2EXT(batch->state->cmdbuf, 0,
- elems->hw_state.num_bindings,
- buffers, buffer_offsets, NULL, buffer_strides);
- else
- vkCmdBindVertexBuffers(batch->state->cmdbuf, 0,
+ if (elems->hw_state.num_bindings > 0)
+ vkCmdBindVertexBuffers(batch->cmdbuf, 0,
elems->hw_state.num_bindings,
buffers, buffer_offsets);
}
-static struct zink_compute_program *
-get_compute_program(struct zink_context *ctx)
-{
- unsigned bits = 1 << PIPE_SHADER_COMPUTE;
- ctx->dirty_shader_stages |= ctx->inlinable_uniforms_dirty_mask &
- ctx->inlinable_uniforms_valid_mask &
- ctx->shader_has_inlinable_uniforms_mask & bits;
- if (ctx->dirty_shader_stages & bits) {
- struct hash_entry *entry = _mesa_hash_table_search(ctx->compute_program_cache,
- &ctx->compute_stage->shader_id);
- if (!entry) {
- struct zink_compute_program *comp;
- comp = zink_create_compute_program(ctx, ctx->compute_stage);
- entry = _mesa_hash_table_insert(ctx->compute_program_cache, &comp->shader->shader_id, comp);
- if (!entry)
- return NULL;
- }
- if (entry->data != ctx->curr_compute)
- ctx->compute_pipeline_state.dirty = true;
- ctx->curr_compute = entry->data;
- ctx->dirty_shader_stages &= bits;
- ctx->inlinable_uniforms_dirty_mask &= bits;
- }
-
- assert(ctx->curr_compute);
- return ctx->curr_compute;
-}
-
static struct zink_gfx_program *
get_gfx_program(struct zink_context *ctx)
{
- if (ctx->last_vertex_stage_dirty) {
- if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
- ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_GEOMETRY);
- else if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
- ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_TESS_EVAL);
- else
- ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_VERTEX);
- ctx->last_vertex_stage_dirty = false;
- }
- unsigned bits = u_bit_consecutive(PIPE_SHADER_VERTEX, 5);
- ctx->dirty_shader_stages |= ctx->inlinable_uniforms_dirty_mask &
- ctx->inlinable_uniforms_valid_mask &
- ctx->shader_has_inlinable_uniforms_mask & bits;
- if (ctx->dirty_shader_stages & bits) {
+ if (ctx->dirty_program) {
struct hash_entry *entry = _mesa_hash_table_search(ctx->program_cache,
ctx->gfx_stages);
- if (entry)
- zink_update_gfx_program(ctx, entry->data);
- else {
+ if (!entry) {
struct zink_gfx_program *prog;
- prog = zink_create_gfx_program(ctx, ctx->gfx_stages);
- entry = _mesa_hash_table_insert(ctx->program_cache, prog->shaders, prog);
+ prog = zink_create_gfx_program(zink_screen(ctx->base.screen),
+ ctx->gfx_stages);
+ entry = _mesa_hash_table_insert(ctx->program_cache, prog->stages, prog);
if (!entry)
return NULL;
}
- if (ctx->curr_program != entry->data)
- ctx->gfx_pipeline_state.combined_dirty = true;
ctx->curr_program = entry->data;
- ctx->dirty_shader_stages &= ~bits;
- ctx->inlinable_uniforms_dirty_mask &= ~bits;
+ ctx->dirty_program = false;
}
assert(ctx->curr_program);
@@ -262,92 +103,32 @@ line_width_needed(enum pipe_prim_type reduced_prim,
}
}
-static inline bool
-restart_supported(enum pipe_prim_type mode)
-{
- return mode == PIPE_PRIM_LINE_STRIP || mode == PIPE_PRIM_TRIANGLE_STRIP || mode == PIPE_PRIM_TRIANGLE_FAN;
-}
-
-static void
-update_drawid(struct zink_context *ctx, unsigned draw_id)
-{
- struct zink_batch *batch = &ctx->batch;
- if (ctx->drawid_broken) {
- vkCmdPushConstants(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT,
- offsetof(struct zink_gfx_push_constant, draw_id), sizeof(unsigned),
- &draw_id);
- }
-}
-
void
zink_draw_vbo(struct pipe_context *pctx,
- const struct pipe_draw_info *dinfo,
- const struct pipe_draw_indirect_info *dindirect,
- const struct pipe_draw_start_count *draws,
- unsigned num_draws)
+ const struct pipe_draw_info *dinfo)
{
- if (!dindirect && (!draws[0].count || !dinfo->instance_count))
- return;
-
struct zink_context *ctx = zink_context(pctx);
struct zink_screen *screen = zink_screen(pctx->screen);
struct zink_rasterizer_state *rast_state = ctx->rast_state;
- struct zink_depth_stencil_alpha_state *dsa_state = ctx->dsa_state;
- struct zink_so_target *so_target =
- dindirect && dindirect->count_from_stream_output ?
- zink_so_target(dindirect->count_from_stream_output) : NULL;
- VkBuffer counter_buffers[PIPE_MAX_SO_OUTPUTS];
- VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_OUTPUTS];
- bool need_index_buffer_unref = false;
-
- /* check memory usage and flush/stall as needed to avoid oom */
- zink_maybe_flush_or_stall(ctx);
-
- if (dinfo->primitive_restart && !restart_supported(dinfo->mode)) {
- util_draw_vbo_without_prim_restart(pctx, dinfo, dindirect, &draws[0]);
- return;
- }
- if (dinfo->mode == PIPE_PRIM_QUADS ||
- dinfo->mode == PIPE_PRIM_QUAD_STRIP ||
- dinfo->mode == PIPE_PRIM_POLYGON ||
- (dinfo->mode == PIPE_PRIM_TRIANGLE_FAN && !screen->have_triangle_fans) ||
- dinfo->mode == PIPE_PRIM_LINE_LOOP) {
+
+ if (dinfo->mode >= PIPE_PRIM_QUADS ||
+ dinfo->mode == PIPE_PRIM_LINE_LOOP ||
+ dinfo->index_size == 1) {
+ if (!u_trim_pipe_prim(dinfo->mode, (unsigned *)&dinfo->count))
+ return;
+
util_primconvert_save_rasterizer_state(ctx->primconvert, &rast_state->base);
- util_primconvert_draw_vbo(ctx->primconvert, dinfo, dindirect, draws, num_draws);
+ util_primconvert_draw_vbo(ctx->primconvert, dinfo);
return;
}
- if (ctx->gfx_pipeline_state.vertices_per_patch != dinfo->vertices_per_patch)
- ctx->gfx_pipeline_state.dirty = true;
- bool drawid_broken = ctx->drawid_broken;
- ctx->drawid_broken = BITSET_TEST(ctx->gfx_stages[PIPE_SHADER_VERTEX]->nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID) &&
- (!dindirect || !dindirect->buffer);
- if (drawid_broken != ctx->drawid_broken)
- ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_VERTEX);
- ctx->gfx_pipeline_state.vertices_per_patch = dinfo->vertices_per_patch;
- if (ctx->rast_state->base.point_quad_rasterization &&
- ctx->gfx_prim_mode != dinfo->mode) {
- if (ctx->gfx_prim_mode == PIPE_PRIM_POINTS || dinfo->mode == PIPE_PRIM_POINTS)
- ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_FRAGMENT);
- }
- ctx->gfx_prim_mode = dinfo->mode;
+
struct zink_gfx_program *gfx_program = get_gfx_program(ctx);
if (!gfx_program)
return;
- if (ctx->gfx_pipeline_state.primitive_restart != !!dinfo->primitive_restart)
- ctx->gfx_pipeline_state.dirty = true;
- ctx->gfx_pipeline_state.primitive_restart = !!dinfo->primitive_restart;
-
- if (!zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state) {
- for (unsigned i = 0; i < ctx->element_state->hw_state.num_bindings; i++) {
- unsigned binding = ctx->element_state->binding_map[i];
- const struct pipe_vertex_buffer *vb = ctx->vertex_buffers + binding;
- if (ctx->gfx_pipeline_state.bindings[i].stride != vb->stride) {
- ctx->gfx_pipeline_state.bindings[i].stride = vb->stride;
- ctx->gfx_pipeline_state.dirty = true;
- }
- }
- }
+ VkPipeline pipeline = zink_get_gfx_pipeline(screen, gfx_program,
+ &ctx->gfx_pipeline_state,
+ dinfo->mode);
enum pipe_prim_type reduced_prim = u_reduced_prim(dinfo->mode);
@@ -372,285 +153,160 @@ zink_draw_vbo(struct pipe_context *pctx,
unsigned index_offset = 0;
struct pipe_resource *index_buffer = NULL;
if (dinfo->index_size > 0) {
- uint32_t restart_index = util_prim_restart_index_from_size(dinfo->index_size);
- if ((dinfo->primitive_restart && (dinfo->restart_index != restart_index)) ||
- (!screen->info.have_EXT_index_type_uint8 && dinfo->index_size == 1)) {
- util_translate_prim_restart_ib(pctx, dinfo, dindirect, &draws[0], &index_buffer);
- need_index_buffer_unref = true;
- } else {
- if (dinfo->has_user_indices) {
- if (!util_upload_index_buffer(pctx, dinfo, &draws[0], &index_buffer, &index_offset, 4)) {
- debug_printf("util_upload_index_buffer() failed\n");
- return;
- }
- } else
- index_buffer = dinfo->index.resource;
- }
+ if (dinfo->has_user_indices) {
+ if (!util_upload_index_buffer(pctx, dinfo, &index_buffer, &index_offset, 4)) {
+ debug_printf("util_upload_index_buffer() failed\n");
+ return;
+ }
+ } else
+ index_buffer = dinfo->index.resource;
}
- if (ctx->xfb_barrier)
- zink_emit_xfb_counter_barrier(ctx);
-
- if (ctx->dirty_so_targets && ctx->num_so_targets)
- zink_emit_stream_output_targets(pctx);
- if (so_target)
- zink_emit_xfb_vertex_input_barrier(ctx, zink_resource(so_target->base.buffer));
+ VkWriteDescriptorSet wds[PIPE_SHADER_TYPES * PIPE_MAX_CONSTANT_BUFFERS + PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
+ VkDescriptorBufferInfo buffer_infos[PIPE_SHADER_TYPES * PIPE_MAX_CONSTANT_BUFFERS];
+ VkDescriptorImageInfo image_infos[PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
+ int num_wds = 0, num_buffer_info = 0, num_image_info = 0;
- barrier_vertex_buffers(ctx);
- barrier_draw_buffers(ctx, dinfo, dindirect, index_buffer);
+ struct zink_resource *transitions[PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
+ int num_transitions = 0;
- for (int i = 0; i < ZINK_SHADER_COUNT; i++) {
+ for (int i = 0; i < ARRAY_SIZE(ctx->gfx_stages); i++) {
struct zink_shader *shader = ctx->gfx_stages[i];
if (!shader)
continue;
- enum pipe_shader_type stage = pipe_shader_type_from_mesa(shader->nir->info.stage);
- if (ctx->num_so_targets &&
- (stage == PIPE_SHADER_GEOMETRY ||
- (stage == PIPE_SHADER_TESS_EVAL && !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]) ||
- (stage == PIPE_SHADER_VERTEX && !ctx->gfx_stages[PIPE_SHADER_GEOMETRY] && !ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]))) {
- for (unsigned j = 0; j < ctx->num_so_targets; j++) {
- struct zink_so_target *t = zink_so_target(ctx->so_targets[j]);
- if (t)
- t->stride = shader->streamout.so_info.stride[j] * sizeof(uint32_t);
+
+ for (int j = 0; j < shader->num_bindings; j++) {
+ int index = shader->bindings[j].index;
+ if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+ assert(ctx->ubos[i][index].buffer_size > 0);
+ assert(ctx->ubos[i][index].buffer_size <= screen->props.limits.maxUniformBufferRange);
+ assert(ctx->ubos[i][index].buffer);
+ struct zink_resource *res = zink_resource(ctx->ubos[i][index].buffer);
+ buffer_infos[num_buffer_info].buffer = res->buffer;
+ buffer_infos[num_buffer_info].offset = ctx->ubos[i][index].buffer_offset;
+ buffer_infos[num_buffer_info].range = ctx->ubos[i][index].buffer_size;
+ wds[num_wds].pBufferInfo = buffer_infos + num_buffer_info;
+ ++num_buffer_info;
+ } else {
+ struct pipe_sampler_view *psampler_view = ctx->image_views[i][index];
+ assert(psampler_view);
+ struct zink_sampler_view *sampler_view = zink_sampler_view(psampler_view);
+
+ struct zink_resource *res = zink_resource(psampler_view->texture);
+ VkImageLayout layout = res->layout;
+ if (layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL &&
+ layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
+ layout != VK_IMAGE_LAYOUT_GENERAL) {
+ transitions[num_transitions++] = res;
+ layout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+ image_infos[num_image_info].imageLayout = layout;
+ image_infos[num_image_info].imageView = sampler_view->image_view;
+ image_infos[num_image_info].sampler = ctx->samplers[i][index];
+ wds[num_wds].pImageInfo = image_infos + num_image_info;
+ ++num_image_info;
}
- }
- }
- if (zink_program_has_descriptors(&gfx_program->base))
- zink_descriptors_update(ctx, screen, false);
-
- struct zink_batch *batch = zink_batch_rp(ctx);
- VkViewport viewports[PIPE_MAX_VIEWPORTS];
- for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
- VkViewport viewport = {
- ctx->vp_state.viewport_states[i].translate[0] - ctx->vp_state.viewport_states[i].scale[0],
- ctx->vp_state.viewport_states[i].translate[1] - ctx->vp_state.viewport_states[i].scale[1],
- ctx->vp_state.viewport_states[i].scale[0] * 2,
- ctx->vp_state.viewport_states[i].scale[1] * 2,
- ctx->rast_state->base.clip_halfz ?
- ctx->vp_state.viewport_states[i].translate[2] :
- ctx->vp_state.viewport_states[i].translate[2] - ctx->vp_state.viewport_states[i].scale[2],
- ctx->vp_state.viewport_states[i].translate[2] + ctx->vp_state.viewport_states[i].scale[2]
- };
- viewports[i] = viewport;
- }
- if (screen->info.have_EXT_extended_dynamic_state)
- screen->vk_CmdSetViewportWithCountEXT(batch->state->cmdbuf, ctx->vp_state.num_viewports, viewports);
- else
- vkCmdSetViewport(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, viewports);
- VkRect2D scissors[PIPE_MAX_VIEWPORTS];
- if (ctx->rast_state->base.scissor) {
- for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
- scissors[i].offset.x = ctx->vp_state.scissor_states[i].minx;
- scissors[i].offset.y = ctx->vp_state.scissor_states[i].miny;
- scissors[i].extent.width = ctx->vp_state.scissor_states[i].maxx - ctx->vp_state.scissor_states[i].minx;
- scissors[i].extent.height = ctx->vp_state.scissor_states[i].maxy - ctx->vp_state.scissor_states[i].miny;
- }
- } else {
- for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
- scissors[i].offset.x = 0;
- scissors[i].offset.y = 0;
- scissors[i].extent.width = ctx->fb_state.width;
- scissors[i].extent.height = ctx->fb_state.height;
+ wds[num_wds].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+ wds[num_wds].pNext = NULL;
+ wds[num_wds].dstBinding = shader->bindings[j].binding;
+ wds[num_wds].dstArrayElement = 0;
+ wds[num_wds].descriptorCount = 1;
+ wds[num_wds].descriptorType = shader->bindings[j].type;
+ ++num_wds;
}
}
- if (screen->info.have_EXT_extended_dynamic_state)
- screen->vk_CmdSetScissorWithCountEXT(batch->state->cmdbuf, ctx->vp_state.num_viewports, scissors);
- else
- vkCmdSetScissor(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, scissors);
- if (line_width_needed(reduced_prim, rast_state->hw_state.polygon_mode)) {
- if (screen->info.feats.features.wideLines || ctx->line_width == 1.0f)
- vkCmdSetLineWidth(batch->state->cmdbuf, ctx->line_width);
- else
- debug_printf("BUG: wide lines not supported, needs fallback!");
- }
+ struct zink_batch *batch;
+ if (num_transitions > 0) {
+ batch = zink_batch_no_rp(ctx);
- if (dsa_state->base.stencil[0].enabled) {
- if (dsa_state->base.stencil[1].enabled) {
- vkCmdSetStencilReference(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT,
- ctx->stencil_ref.ref_value[0]);
- vkCmdSetStencilReference(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT,
- ctx->stencil_ref.ref_value[1]);
- } else
- vkCmdSetStencilReference(batch->state->cmdbuf,
- VK_STENCIL_FACE_FRONT_AND_BACK,
- ctx->stencil_ref.ref_value[0]);
+ for (int i = 0; i < num_transitions; ++i)
+ zink_resource_barrier(batch->cmdbuf, transitions[i],
+ transitions[i]->aspect,
+ VK_IMAGE_LAYOUT_GENERAL);
}
- if (depth_bias)
- vkCmdSetDepthBias(batch->state->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale);
- else
- vkCmdSetDepthBias(batch->state->cmdbuf, 0.0f, 0.0f, 0.0f);
+ batch = zink_batch_rp(ctx);
- if (ctx->gfx_pipeline_state.blend_state->need_blend_constants)
- vkCmdSetBlendConstants(batch->state->cmdbuf, ctx->blend_constants);
+ if (batch->descs_left < gfx_program->num_descriptors) {
+ ctx->base.flush(&ctx->base, NULL, 0);
+ batch = zink_batch_rp(ctx);
+ assert(batch->descs_left >= gfx_program->num_descriptors);
+ }
+ VkDescriptorSet desc_set = allocate_descriptor_set(screen, batch,
+ gfx_program);
+ assert(desc_set != VK_NULL_HANDLE);
- VkPipeline pipeline = zink_get_gfx_pipeline(screen, gfx_program,
- &ctx->gfx_pipeline_state,
- dinfo->mode);
- vkCmdBindPipeline(batch->state->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
-
- zink_bind_vertex_buffers(batch, ctx);
+ for (int i = 0; i < ARRAY_SIZE(ctx->gfx_stages); i++) {
+ struct zink_shader *shader = ctx->gfx_stages[i];
+ if (!shader)
+ continue;
- if (BITSET_TEST(ctx->gfx_stages[PIPE_SHADER_VERTEX]->nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX)) {
- unsigned draw_mode_is_indexed = dinfo->index_size > 0;
- vkCmdPushConstants(batch->state->cmdbuf, gfx_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT,
- offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned),
- &draw_mode_is_indexed);
- }
- if (gfx_program->shaders[PIPE_SHADER_TESS_CTRL] && gfx_program->shaders[PIPE_SHADER_TESS_CTRL]->is_generated)
- vkCmdPushConstants(batch->state->cmdbuf, gfx_program->base.layout, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
- offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6,
- &ctx->tess_levels[0]);
-
- zink_query_update_gs_states(ctx);
-
- if (ctx->num_so_targets) {
- for (unsigned i = 0; i < ctx->num_so_targets; i++) {
- struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
- counter_buffers[i] = VK_NULL_HANDLE;
- if (t) {
- struct zink_resource *res = zink_resource(t->counter_buffer);
- zink_batch_reference_resource_rw(batch, res, true);
- if (t->counter_buffer_valid) {
- counter_buffers[i] = res->obj->buffer;
- counter_buffer_offsets[i] = t->counter_buffer_offset;
- }
+ for (int j = 0; j < shader->num_bindings; j++) {
+ int index = shader->bindings[j].index;
+ if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+ struct zink_resource *res = zink_resource(ctx->ubos[i][index].buffer);
+ zink_batch_reference_resoure(batch, res);
+ } else {
+ struct zink_sampler_view *sampler_view = zink_sampler_view(ctx->image_views[i][index]);
+ zink_batch_reference_sampler_view(batch, sampler_view);
}
}
- screen->vk_CmdBeginTransformFeedbackEXT(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
}
- unsigned draw_id = dinfo->drawid;
- if (dinfo->index_size > 0) {
- VkIndexType index_type;
- unsigned index_size = dinfo->index_size;
- if (need_index_buffer_unref)
- /* index buffer will have been promoted from uint8 to uint16 in this case */
- index_size = MAX2(index_size, 2);
- switch (index_size) {
- case 1:
- assert(screen->info.have_EXT_index_type_uint8);
- index_type = VK_INDEX_TYPE_UINT8_EXT;
- break;
- case 2:
- index_type = VK_INDEX_TYPE_UINT16;
- break;
- case 4:
- index_type = VK_INDEX_TYPE_UINT32;
- break;
- default:
- unreachable("unknown index size!");
- }
- struct zink_resource *res = zink_resource(index_buffer);
- vkCmdBindIndexBuffer(batch->state->cmdbuf, res->obj->buffer, index_offset, index_type);
- zink_batch_reference_resource_rw(batch, res, false);
- if (dindirect && dindirect->buffer) {
- assert(num_draws == 1);
- update_drawid(ctx, draw_id);
- struct zink_resource *indirect = zink_resource(dindirect->buffer);
- zink_batch_reference_resource_rw(batch, indirect, false);
- if (dindirect->indirect_draw_count) {
- struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count);
- zink_batch_reference_resource_rw(batch, indirect_draw_count, false);
- screen->vk_CmdDrawIndexedIndirectCount(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset,
- indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset,
- dindirect->draw_count, dindirect->stride);
- } else
- vkCmdDrawIndexedIndirect(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
- } else {
- for (unsigned i = 0; i < num_draws; i++) {
- update_drawid(ctx, draw_id);
- vkCmdDrawIndexed(batch->state->cmdbuf,
- draws[i].count, dinfo->instance_count,
- need_index_buffer_unref ? 0 : draws[i].start, dinfo->index_bias, dinfo->start_instance);
- if (dinfo->increment_draw_id)
- draw_id++;
- }
- }
- } else {
- if (so_target && screen->info.tf_props.transformFeedbackDraw) {
- update_drawid(ctx, draw_id);
- zink_batch_reference_resource_rw(batch, zink_resource(so_target->base.buffer), false);
- zink_batch_reference_resource_rw(batch, zink_resource(so_target->counter_buffer), true);
- screen->vk_CmdDrawIndirectByteCountEXT(batch->state->cmdbuf, dinfo->instance_count, dinfo->start_instance,
- zink_resource(so_target->counter_buffer)->obj->buffer, so_target->counter_buffer_offset, 0,
- MIN2(so_target->stride, screen->info.tf_props.maxTransformFeedbackBufferDataStride));
- } else if (dindirect && dindirect->buffer) {
- assert(num_draws == 1);
- update_drawid(ctx, draw_id);
- struct zink_resource *indirect = zink_resource(dindirect->buffer);
- zink_batch_reference_resource_rw(batch, indirect, false);
- if (dindirect->indirect_draw_count) {
- struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count);
- zink_batch_reference_resource_rw(batch, indirect_draw_count, false);
- screen->vk_CmdDrawIndirectCount(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset,
- indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset,
- dindirect->draw_count, dindirect->stride);
- } else
- vkCmdDrawIndirect(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
- } else {
- for (unsigned i = 0; i < num_draws; i++) {
- update_drawid(ctx, draw_id);
- vkCmdDraw(batch->state->cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance);
- if (dinfo->increment_draw_id)
- draw_id++;
- }
- }
+ vkCmdSetViewport(batch->cmdbuf, 0, ctx->num_viewports, ctx->viewports);
+ if (ctx->rast_state->base.scissor)
+ vkCmdSetScissor(batch->cmdbuf, 0, ctx->num_viewports, ctx->scissors);
+ else if (ctx->fb_state.width && ctx->fb_state.height) {
+ VkRect2D fb_scissor = {};
+ fb_scissor.extent.width = ctx->fb_state.width;
+ fb_scissor.extent.height = ctx->fb_state.height;
+ vkCmdSetScissor(batch->cmdbuf, 0, 1, &fb_scissor);
}
- if (dinfo->index_size > 0 && (dinfo->has_user_indices || need_index_buffer_unref))
- pipe_resource_reference(&index_buffer, NULL);
-
- if (ctx->num_so_targets) {
- for (unsigned i = 0; i < ctx->num_so_targets; i++) {
- struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
- if (t) {
- counter_buffers[i] = zink_resource(t->counter_buffer)->obj->buffer;
- counter_buffer_offsets[i] = t->counter_buffer_offset;
- t->counter_buffer_valid = true;
- }
- }
- screen->vk_CmdEndTransformFeedbackEXT(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
+ if (line_width_needed(reduced_prim, rast_state->hw_state.polygon_mode)) {
+ if (screen->feats.wideLines || ctx->line_width == 1.0f)
+ vkCmdSetLineWidth(batch->cmdbuf, ctx->line_width);
+ else
+ debug_printf("BUG: wide lines not supported, needs fallback!");
}
- batch->has_work = true;
-}
-void
-zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
-{
- struct zink_context *ctx = zink_context(pctx);
- struct zink_screen *screen = zink_screen(pctx->screen);
- struct zink_batch *batch = &ctx->batch;
-
- /* check memory usage and flush/stall as needed to avoid oom */
- zink_maybe_flush_or_stall(ctx);
+ vkCmdSetStencilReference(batch->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, ctx->stencil_ref.ref_value[0]);
+ vkCmdSetStencilReference(batch->cmdbuf, VK_STENCIL_FACE_BACK_BIT, ctx->stencil_ref.ref_value[1]);
- struct zink_compute_program *comp_program = get_compute_program(ctx);
- if (!comp_program)
- return;
-
- zink_program_update_compute_pipeline_state(ctx, comp_program, info->block);
- VkPipeline pipeline = zink_get_compute_pipeline(screen, comp_program,
- &ctx->compute_pipeline_state);
-
- if (zink_program_has_descriptors(&comp_program->base))
- zink_descriptors_update(ctx, screen, true);
+ if (depth_bias)
+ vkCmdSetDepthBias(batch->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale);
+ else
+ vkCmdSetDepthBias(batch->cmdbuf, 0.0f, 0.0f, 0.0f);
+ if (ctx->gfx_pipeline_state.blend_state->need_blend_constants)
+ vkCmdSetBlendConstants(batch->cmdbuf, ctx->blend_constants);
- vkCmdBindPipeline(batch->state->cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
+ if (num_wds > 0) {
+ for (int i = 0; i < num_wds; ++i)
+ wds[i].dstSet = desc_set;
+ vkUpdateDescriptorSets(screen->dev, num_wds, wds, 0, NULL);
+ }
- if (BITSET_TEST(comp_program->shader->nir->info.system_values_read, SYSTEM_VALUE_WORK_DIM))
- vkCmdPushConstants(batch->state->cmdbuf, comp_program->base.layout, VK_SHADER_STAGE_COMPUTE_BIT,
- offsetof(struct zink_cs_push_constant, work_dim), sizeof(uint32_t),
- &info->work_dim);
+ vkCmdBindPipeline(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ vkCmdBindDescriptorSets(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ gfx_program->layout, 0, 1, &desc_set, 0, NULL);
+ zink_bind_vertex_buffers(batch, ctx);
- if (info->indirect) {
- vkCmdDispatchIndirect(batch->state->cmdbuf, zink_resource(info->indirect)->obj->buffer, info->indirect_offset);
- zink_batch_reference_resource_rw(batch, zink_resource(info->indirect), false);
+ if (dinfo->index_size > 0) {
+ assert(dinfo->index_size != 1);
+ VkIndexType index_type = dinfo->index_size == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
+ struct zink_resource *res = zink_resource(index_buffer);
+ vkCmdBindIndexBuffer(batch->cmdbuf, res->buffer, index_offset, index_type);
+ zink_batch_reference_resoure(batch, res);
+ vkCmdDrawIndexed(batch->cmdbuf,
+ dinfo->count, dinfo->instance_count,
+ dinfo->start, dinfo->index_bias, dinfo->start_instance);
} else
- vkCmdDispatch(batch->state->cmdbuf, info->grid[0], info->grid[1], info->grid[2]);
- batch->has_work = true;
+ vkCmdDraw(batch->cmdbuf, dinfo->count, dinfo->instance_count, dinfo->start, dinfo->start_instance);
+
+ if (dinfo->index_size > 0 && dinfo->has_user_indices)
+ pipe_resource_reference(&index_buffer, NULL);
}
diff --git a/lib/mesa/src/intel/compiler/brw_gen_enum.h b/lib/mesa/src/intel/compiler/brw_gen_enum.h
index 9af169b93..cd50f9c6f 100644
--- a/lib/mesa/src/intel/compiler/brw_gen_enum.h
+++ b/lib/mesa/src/intel/compiler/brw_gen_enum.h
@@ -25,40 +25,37 @@
#include "dev/gen_device_info.h"
enum gen {
- GFX4 = (1 << 0),
- GFX45 = (1 << 1),
- GFX5 = (1 << 2),
- GFX6 = (1 << 3),
- GFX7 = (1 << 4),
- GFX75 = (1 << 5),
- GFX8 = (1 << 6),
- GFX9 = (1 << 7),
- GFX10 = (1 << 8),
- GFX11 = (1 << 9),
- GFX12 = (1 << 10),
- GFX125 = (1 << 11),
- GFX_ALL = ~0
+ GEN4 = (1 << 0),
+ GEN45 = (1 << 1),
+ GEN5 = (1 << 2),
+ GEN6 = (1 << 3),
+ GEN7 = (1 << 4),
+ GEN75 = (1 << 5),
+ GEN8 = (1 << 6),
+ GEN9 = (1 << 7),
+ GEN10 = (1 << 8),
+ GEN11 = (1 << 9),
+ GEN12 = (1 << 10),
+ GEN_ALL = ~0
};
-#define GFX_LT(gen) ((gen) - 1)
-#define GFX_GE(gen) (~GFX_LT(gen))
-#define GFX_LE(gen) (GFX_LT(gen) | (gen))
+#define GEN_LT(gen) ((gen) - 1)
+#define GEN_GE(gen) (~GEN_LT(gen))
+#define GEN_LE(gen) (GEN_LT(gen) | (gen))
static enum gen
gen_from_devinfo(const struct gen_device_info *devinfo)
{
- switch (devinfo->verx10) {
- case 40: return GFX4;
- case 45: return GFX45;
- case 50: return GFX5;
- case 60: return GFX6;
- case 70: return GFX7;
- case 75: return GFX75;
- case 80: return GFX8;
- case 90: return GFX9;
- case 110: return GFX11;
- case 120: return GFX12;
- case 125: return GFX125;
+ switch (devinfo->gen) {
+ case 4: return devinfo->is_g4x ? GEN45 : GEN4;
+ case 5: return GEN5;
+ case 6: return GEN6;
+ case 7: return devinfo->is_haswell ? GEN75 : GEN7;
+ case 8: return GEN8;
+ case 9: return GEN9;
+ case 10: return GEN10;
+ case 11: return GEN11;
+ case 12: return GEN12;
default:
unreachable("not reached");
}
diff --git a/lib/mesa/src/intel/perf/gen_perf_private.h b/lib/mesa/src/intel/perf/gen_perf_private.h
index acca9a2b4..ac222d537 100644
--- a/lib/mesa/src/intel/perf/gen_perf_private.h
+++ b/lib/mesa/src/intel/perf/gen_perf_private.h
@@ -31,11 +31,6 @@ static inline uint64_t to_user_pointer(void *ptr)
return (uintptr_t) ptr;
}
-static inline uint64_t to_const_user_pointer(const void *ptr)
-{
- return (uintptr_t) ptr;
-}
-
static inline void
gen_perf_query_add_stat_reg(struct gen_perf_query_info *query, uint32_t reg,
uint32_t numerator, uint32_t denominator,
@@ -46,7 +41,7 @@ gen_perf_query_add_stat_reg(struct gen_perf_query_info *query, uint32_t reg,
assert(query->n_counters < query->max_counters);
counter = &query->counters[query->n_counters];
- counter->name = counter->symbol_name = name;
+ counter->name = name;
counter->desc = description;
counter->type = GEN_PERF_COUNTER_TYPE_RAW;
counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64;
@@ -76,8 +71,6 @@ gen_perf_append_query_info(struct gen_perf_config *perf, int max_counters)
query = &perf->queries[perf->n_queries - 1];
memset(query, 0, sizeof(*query));
- query->perf = perf;
-
if (max_counters > 0) {
query->max_counters = max_counters;
query->counters =
diff --git a/lib/mesa/src/intel/perf/gen_perf_query.c b/lib/mesa/src/intel/perf/gen_perf_query.c
index 10f9b9709..b9744913b 100644
--- a/lib/mesa/src/intel/perf/gen_perf_query.c
+++ b/lib/mesa/src/intel/perf/gen_perf_query.c
@@ -23,7 +23,7 @@
#include <unistd.h>
-#include "common/intel_gem.h"
+#include "common/gen_gem.h"
#include "dev/gen_debug.h"
#include "dev/gen_device_info.h"
@@ -36,29 +36,13 @@
#include "drm-uapi/i915_drm.h"
-#include "util/compiler.h"
#include "util/u_math.h"
#define FILE_DEBUG_FLAG DEBUG_PERFMON
-
-#define MI_RPC_BO_SIZE (4096)
-#define MI_FREQ_OFFSET_BYTES (256)
-#define MI_PERF_COUNTERS_OFFSET_BYTES (260)
-
-#define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
-
-/* Align to 64bytes, requirement for OA report write address. */
-#define TOTAL_QUERY_DATA_SIZE \
- ALIGN(256 /* OA report */ + \
- 4 /* freq register */ + \
- 8 + 8 /* perf counter 1 & 2 */, \
- 64)
-
-
-static uint32_t field_offset(bool end, uint32_t offset)
-{
- return (end ? TOTAL_QUERY_DATA_SIZE : 0) + offset;
-}
+#define MI_RPC_BO_SIZE 4096
+#define MI_FREQ_START_OFFSET_BYTES (3072)
+#define MI_RPC_BO_END_OFFSET_BYTES (MI_RPC_BO_SIZE / 2)
+#define MI_FREQ_END_OFFSET_BYTES (3076)
#define MAP_READ (1 << 0)
#define MAP_WRITE (1 << 1)
@@ -235,6 +219,11 @@ struct gen_perf_query_object
bool results_accumulated;
/**
+ * Frequency of the GT at begin and end of the query.
+ */
+ uint64_t gt_frequency[2];
+
+ /**
* Accumulated OA results between begin and end of the query.
*/
struct gen_perf_query_result result;
@@ -253,7 +242,6 @@ struct gen_perf_query_object
struct gen_perf_context {
struct gen_perf_config *perf;
- void * mem_ctx; /* ralloc context */
void * ctx; /* driver context (eg, brw_context) */
void * bufmgr;
const struct gen_device_info *devinfo;
@@ -320,7 +308,7 @@ static bool
inc_n_users(struct gen_perf_context *perf_ctx)
{
if (perf_ctx->n_oa_users == 0 &&
- intel_ioctl(perf_ctx->oa_stream_fd, I915_PERF_IOCTL_ENABLE, 0) < 0)
+ gen_ioctl(perf_ctx->oa_stream_fd, I915_PERF_IOCTL_ENABLE, 0) < 0)
{
return false;
}
@@ -339,7 +327,7 @@ dec_n_users(struct gen_perf_context *perf_ctx)
*/
--perf_ctx->n_oa_users;
if (perf_ctx->n_oa_users == 0 &&
- intel_ioctl(perf_ctx->oa_stream_fd, I915_PERF_IOCTL_DISABLE, 0) < 0)
+ gen_ioctl(perf_ctx->oa_stream_fd, I915_PERF_IOCTL_DISABLE, 0) < 0)
{
DBG("WARNING: Error disabling gen perf stream: %m\n");
}
@@ -360,6 +348,8 @@ gen_perf_close(struct gen_perf_context *perfquery,
}
}
+#define NUM_PERF_PROPERTIES(array) (ARRAY_SIZE(array) / 2)
+
static bool
gen_perf_open(struct gen_perf_context *perf_ctx,
int metrics_set_id,
@@ -368,43 +358,31 @@ gen_perf_open(struct gen_perf_context *perf_ctx,
int drm_fd,
uint32_t ctx_id)
{
- uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
- uint32_t p = 0;
-
- /* Single context sampling */
- properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
- properties[p++] = ctx_id;
-
- /* Include OA reports in samples */
- properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
- properties[p++] = true;
+ uint64_t properties[] = {
+ /* Single context sampling */
+ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_id,
- /* OA unit configuration */
- properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
- properties[p++] = metrics_set_id;
+ /* Include OA reports in samples */
+ DRM_I915_PERF_PROP_SAMPLE_OA, true,
- properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
- properties[p++] = report_format;
-
- properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
- properties[p++] = period_exponent;
-
- /* SSEU configuration */
- if (gen_perf_has_global_sseu(perf_ctx->perf)) {
- properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
- properties[p++] = to_user_pointer(&perf_ctx->perf->sseu);
- }
-
- assert(p <= ARRAY_SIZE(properties));
+ /* OA unit configuration */
+ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
+ DRM_I915_PERF_PROP_OA_FORMAT, report_format,
+ DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
+ /* SSEU configuration */
+ DRM_I915_PERF_PROP_GLOBAL_SSEU, to_user_pointer(&perf_ctx->perf->sseu),
+ };
struct drm_i915_perf_open_param param = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
- .num_properties = p / 2,
+ .num_properties = perf_ctx->perf->i915_perf_version >= 4 ?
+ NUM_PERF_PROPERTIES(properties) :
+ NUM_PERF_PROPERTIES(properties) - 1,
.properties_ptr = (uintptr_t) properties,
};
- int fd = intel_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param);
+ int fd = gen_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param);
if (fd == -1) {
DBG("Error opening gen perf OA stream: %m\n");
return false;
@@ -445,7 +423,7 @@ get_metric_id(struct gen_perf_config *perf,
if (!gen_perf_load_metric_id(perf, query->guid,
&raw_query->oa_metrics_set_id)) {
DBG("Unable to read query guid=%s ID, falling back to test config\n", query->guid);
- raw_query->oa_metrics_set_id = perf->fallback_raw_oa_metric;
+ raw_query->oa_metrics_set_id = 1ULL;
} else {
DBG("Raw query '%s'guid=%s loaded ID: %"PRIu64"\n",
query->name, query->guid, query->oa_metrics_set_id);
@@ -570,7 +548,6 @@ gen_perf_config(struct gen_perf_context *ctx)
void
gen_perf_init_context(struct gen_perf_context *perf_ctx,
struct gen_perf_config *perf_cfg,
- void * mem_ctx, /* ralloc context */
void * ctx, /* driver context (eg, brw_context) */
void * bufmgr, /* eg brw_bufmgr */
const struct gen_device_info *devinfo,
@@ -578,7 +555,6 @@ gen_perf_init_context(struct gen_perf_context *perf_ctx,
int drm_fd)
{
perf_ctx->perf = perf_cfg;
- perf_ctx->mem_ctx = mem_ctx;
perf_ctx->ctx = ctx;
perf_ctx->bufmgr = bufmgr;
perf_ctx->drm_fd = drm_fd;
@@ -586,7 +562,7 @@ gen_perf_init_context(struct gen_perf_context *perf_ctx,
perf_ctx->devinfo = devinfo;
perf_ctx->unaccumulated =
- ralloc_array(mem_ctx, struct gen_perf_query_object *, 2);
+ ralloc_array(ctx, struct gen_perf_query_object *, 2);
perf_ctx->unaccumulated_elements = 0;
perf_ctx->unaccumulated_array_size = 2;
@@ -621,7 +597,7 @@ add_to_unaccumulated_query_list(struct gen_perf_context *perf_ctx,
{
perf_ctx->unaccumulated_array_size *= 1.5;
perf_ctx->unaccumulated =
- reralloc(perf_ctx->mem_ctx, perf_ctx->unaccumulated,
+ reralloc(perf_ctx->ctx, perf_ctx->unaccumulated,
struct gen_perf_query_object *,
perf_ctx->unaccumulated_array_size);
}
@@ -649,42 +625,22 @@ snapshot_statistics_registers(struct gen_perf_context *ctx,
perf->vtbl.store_register_mem(ctx->ctx, obj->pipeline_stats.bo,
counter->pipeline_stat.reg, 8,
- offset_in_bytes + counter->offset);
+ offset_in_bytes + i * sizeof(uint64_t));
}
}
static void
-snapshot_query_layout(struct gen_perf_context *perf_ctx,
- struct gen_perf_query_object *query,
- bool end_snapshot)
+snapshot_freq_register(struct gen_perf_context *ctx,
+ struct gen_perf_query_object *query,
+ uint32_t bo_offset)
{
- struct gen_perf_config *perf_cfg = perf_ctx->perf;
- const struct gen_perf_query_field_layout *layout = &perf_cfg->query_layout;
- uint32_t offset = end_snapshot ? align(layout->size, layout->alignment) : 0;
-
- for (uint32_t f = 0; f < layout->n_fields; f++) {
- const struct gen_perf_query_field *field =
- &layout->fields[end_snapshot ? f : (layout->n_fields - 1 - f)];
+ struct gen_perf_config *perf = ctx->perf;
+ const struct gen_device_info *devinfo = ctx->devinfo;
- switch (field->type) {
- case GEN_PERF_QUERY_FIELD_TYPE_MI_RPC:
- perf_cfg->vtbl.emit_mi_report_perf_count(perf_ctx->ctx, query->oa.bo,
- offset + field->location,
- query->oa.begin_report_id +
- (end_snapshot ? 1 : 0));
- break;
- case GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
- case GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
- case GEN_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
- case GEN_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
- perf_cfg->vtbl.store_register_mem(perf_ctx->ctx, query->oa.bo,
- field->mmio_offset, field->size,
- offset + field->location);
- break;
- default:
- unreachable("Invalid field type");
- }
- }
+ if (devinfo->gen == 8 && !devinfo->is_cherryview)
+ perf->vtbl.store_register_mem(ctx->ctx, query->oa.bo, GEN7_RPSTAT1, 4, bo_offset);
+ else if (devinfo->gen >= 9)
+ perf->vtbl.store_register_mem(ctx->ctx, query->oa.bo, GEN9_RPSTAT0, 4, bo_offset);
}
bool
@@ -764,8 +720,8 @@ gen_perf_begin_query(struct gen_perf_context *perf_ctx,
/* The period_exponent gives a sampling period as follows:
* sample_period = timestamp_period * 2^(period_exponent + 1)
*
- * The timestamps increments every 80ns (HSW), ~52ns (GFX9LP) or
- * ~83ns (GFX8/9).
+ * The timestamps increments every 80ns (HSW), ~52ns (GEN9LP) or
+ * ~83ns (GEN8/9).
*
* The counter overflow period is derived from the EuActive counter
* which reads a counter that increments by the number of clock
@@ -781,7 +737,7 @@ gen_perf_begin_query(struct gen_perf_context *perf_ctx,
*/
int a_counter_in_bits = 32;
- if (devinfo->ver >= 8)
+ if (devinfo->gen >= 8)
a_counter_in_bits = 40;
uint64_t overflow_period = pow(2, a_counter_in_bits) / (perf_cfg->sys_vars.n_eus *
@@ -845,7 +801,10 @@ gen_perf_begin_query(struct gen_perf_context *perf_ctx,
query->oa.begin_report_id = perf_ctx->next_query_start_report_id;
perf_ctx->next_query_start_report_id += 2;
- snapshot_query_layout(perf_ctx, query, false /* end_snapshot */);
+ /* Take a starting OA counter snapshot. */
+ perf_cfg->vtbl.emit_mi_report_perf_count(perf_ctx->ctx, query->oa.bo, 0,
+ query->oa.begin_report_id);
+ snapshot_freq_register(perf_ctx, query, MI_FREQ_START_OFFSET_BYTES);
++perf_ctx->n_active_oa_queries;
@@ -921,8 +880,13 @@ gen_perf_end_query(struct gen_perf_context *perf_ctx,
* from perf. In this case we mustn't try and emit a closing
* MI_RPC command in case the OA unit has already been disabled
*/
- if (!query->oa.results_accumulated)
- snapshot_query_layout(perf_ctx, query, true /* end_snapshot */);
+ if (!query->oa.results_accumulated) {
+ /* Take an ending OA counter snapshot. */
+ snapshot_freq_register(perf_ctx, query, MI_FREQ_END_OFFSET_BYTES);
+ perf_cfg->vtbl.emit_mi_report_perf_count(perf_ctx->ctx, query->oa.bo,
+ MI_RPC_BO_END_OFFSET_BYTES,
+ query->oa.begin_report_id + 1);
+ }
--perf_ctx->n_active_oa_queries;
@@ -974,24 +938,20 @@ read_oa_samples_until(struct gen_perf_context *perf_ctx,
if (len <= 0) {
exec_list_push_tail(&perf_ctx->free_sample_buffers, &buf->link);
- if (len == 0) {
+ if (len < 0) {
+ if (errno == EAGAIN) {
+ return ((last_timestamp - start_timestamp) < INT32_MAX &&
+ (last_timestamp - start_timestamp) >=
+ (end_timestamp - start_timestamp)) ?
+ OA_READ_STATUS_FINISHED :
+ OA_READ_STATUS_UNFINISHED;
+ } else {
+ DBG("Error reading i915 perf samples: %m\n");
+ }
+ } else
DBG("Spurious EOF reading i915 perf samples\n");
- return OA_READ_STATUS_ERROR;
- }
-
- if (errno != EAGAIN) {
- DBG("Error reading i915 perf samples: %m\n");
- return OA_READ_STATUS_ERROR;
- }
-
- if ((last_timestamp - start_timestamp) >= INT32_MAX)
- return OA_READ_STATUS_UNFINISHED;
-
- if ((last_timestamp - start_timestamp) <
- (end_timestamp - start_timestamp))
- return OA_READ_STATUS_UNFINISHED;
- return OA_READ_STATUS_FINISHED;
+ return OA_READ_STATUS_ERROR;
}
buf->len = len;
@@ -1041,8 +1001,8 @@ read_oa_samples_for_query(struct gen_perf_context *perf_ctx,
if (query->oa.map == NULL)
query->oa.map = perf_cfg->vtbl.bo_map(perf_ctx->ctx, query->oa.bo, MAP_READ);
- start = last = query->oa.map + field_offset(false, 0);
- end = query->oa.map + field_offset(true, 0);
+ start = last = query->oa.map;
+ end = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
if (start[0] != query->oa.begin_report_id) {
DBG("Spurious start report id=%"PRIu32"\n", start[0]);
@@ -1056,7 +1016,8 @@ read_oa_samples_for_query(struct gen_perf_context *perf_ctx,
/* Read the reports until the end timestamp. */
switch (read_oa_samples_until(perf_ctx, start[1], end[1])) {
case OA_READ_STATUS_ERROR:
- FALLTHROUGH; /* Let accumulate_oa_reports() deal with the error. */
+ /* Fallthrough and let accumulate_oa_reports() deal with the
+ * error. */
case OA_READ_STATUS_FINISHED:
return true;
case OA_READ_STATUS_UNFINISHED:
@@ -1194,8 +1155,8 @@ static bool
oa_report_ctx_id_valid(const struct gen_device_info *devinfo,
const uint32_t *report)
{
- assert(devinfo->ver >= 8);
- if (devinfo->ver == 8)
+ assert(devinfo->gen >= 8);
+ if (devinfo->gen == 8)
return (report[0] & (1 << 25)) != 0;
return (report[0] & (1 << 16)) != 0;
}
@@ -1213,7 +1174,7 @@ oa_report_ctx_id_valid(const struct gen_device_info *devinfo,
*
* These periodic snapshots help to ensure we handle counter overflow
* correctly by being frequent enough to ensure we don't miss multiple
- * overflows of a counter between snapshots. For Gfx8+ the i915 perf
+ * overflows of a counter between snapshots. For Gen8+ the i915 perf
* snapshots provide the extra context-switch reports that let us
* subtract out the progress of counters associated with other
* contexts running on the system.
@@ -1232,8 +1193,8 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
assert(query->oa.map != NULL);
- start = last = query->oa.map + field_offset(false, 0);
- end = query->oa.map + field_offset(true, 0);
+ start = last = query->oa.map;
+ end = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
if (start[0] != query->oa.begin_report_id) {
DBG("Spurious start report id=%"PRIu32"\n", start[0]);
@@ -1244,10 +1205,10 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
goto error;
}
- /* On Gfx12+ OA reports are sourced from per context counters, so we don't
+ /* On Gen12+ OA reports are sourced from per context counters, so we don't
* ever have to look at the global OA buffer. Yey \o/
*/
- if (perf_ctx->devinfo->ver >= 12) {
+ if (perf_ctx->devinfo->gen >= 12) {
last = start;
goto end;
}
@@ -1300,7 +1261,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
goto end;
}
- /* For Gfx8+ since the counters continue while other
+ /* For Gen8+ since the counters continue while other
* contexts are running we need to discount any unrelated
* deltas. The hardware automatically generates a report
* on context switch which gives us a new reference point
@@ -1309,7 +1270,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
* For Haswell we can rely on the HW to stop the progress
* of OA counters while any other context is acctive.
*/
- if (devinfo->ver >= 8) {
+ if (devinfo->gen >= 8) {
/* Consider that the current report matches our context only if
* the report says the report ID is valid.
*/
@@ -1339,7 +1300,6 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
if (add) {
gen_perf_query_result_accumulate(&query->oa.result,
query->queryinfo,
- devinfo,
last, report);
} else {
/* We're not adding the delta because we've identified it's not
@@ -1368,7 +1328,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
end:
gen_perf_query_result_accumulate(&query->oa.result, query->queryinfo,
- devinfo, last, end);
+ last, end);
query->oa.results_accumulated = true;
drop_from_unaccumulated_query_list(perf_ctx, query);
@@ -1431,6 +1391,38 @@ gen_perf_delete_query(struct gen_perf_context *perf_ctx,
free(query);
}
+#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
+
+static void
+read_gt_frequency(struct gen_perf_context *perf_ctx,
+ struct gen_perf_query_object *obj)
+{
+ const struct gen_device_info *devinfo = perf_ctx->devinfo;
+ uint32_t start = *((uint32_t *)(obj->oa.map + MI_FREQ_START_OFFSET_BYTES)),
+ end = *((uint32_t *)(obj->oa.map + MI_FREQ_END_OFFSET_BYTES));
+
+ switch (devinfo->gen) {
+ case 7:
+ case 8:
+ obj->oa.gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
+ obj->oa.gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
+ break;
+ case 9:
+ case 10:
+ case 11:
+ case 12:
+ obj->oa.gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
+ obj->oa.gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
+ break;
+ default:
+ unreachable("unexpected gen");
+ }
+
+ /* Put the numbers into Hz. */
+ obj->oa.gt_frequency[0] *= 1000000ULL;
+ obj->oa.gt_frequency[1] *= 1000000ULL;
+}
+
static int
get_oa_counter_data(struct gen_perf_context *perf_ctx,
struct gen_perf_query_object *query,
@@ -1454,21 +1446,19 @@ get_oa_counter_data(struct gen_perf_context *perf_ctx,
out_uint64 = (uint64_t *)(data + counter->offset);
*out_uint64 =
counter->oa_counter_read_uint64(perf_cfg, queryinfo,
- &query->oa.result);
+ query->oa.result.accumulator);
break;
case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
out_float = (float *)(data + counter->offset);
*out_float =
counter->oa_counter_read_float(perf_cfg, queryinfo,
- &query->oa.result);
+ query->oa.result.accumulator);
break;
default:
/* So far we aren't using uint32, double or bool32... */
unreachable("unexpected counter data type");
}
-
- if (counter->offset + counter_size > written)
- written = counter->offset + counter_size;
+ written = counter->offset + counter_size;
}
}
@@ -1535,14 +1525,13 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
;
+ read_gt_frequency(perf_ctx, query);
uint32_t *begin_report = query->oa.map;
- uint32_t *end_report = query->oa.map + perf_cfg->query_layout.size;
- gen_perf_query_result_accumulate_fields(&query->oa.result,
- query->queryinfo,
- perf_ctx->devinfo,
- begin_report,
- end_report,
- true /* no_oa_accumulate */);
+ uint32_t *end_report = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
+ gen_perf_query_result_read_frequencies(&query->oa.result,
+ perf_ctx->devinfo,
+ begin_report,
+ end_report);
accumulate_oa_reports(perf_ctx, query);
assert(query->oa.results_accumulated);
@@ -1555,8 +1544,9 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
const struct gen_device_info *devinfo = perf_ctx->devinfo;
written = gen_perf_query_result_write_mdapi((uint8_t *)data, data_size,
- devinfo, query->queryinfo,
- &query->oa.result);
+ devinfo, &query->oa.result,
+ query->oa.gt_frequency[0],
+ query->oa.gt_frequency[1]);
}
break;
diff --git a/lib/mesa/src/intel/perf/gen_perf_query.h b/lib/mesa/src/intel/perf/gen_perf_query.h
index b029e01d0..d064a5d06 100644
--- a/lib/mesa/src/intel/perf/gen_perf_query.h
+++ b/lib/mesa/src/intel/perf/gen_perf_query.h
@@ -36,7 +36,6 @@ struct gen_perf_context *gen_perf_new_context(void *parent);
void gen_perf_init_context(struct gen_perf_context *perf_ctx,
struct gen_perf_config *perf_cfg,
- void * mem_ctx, /* ralloc context */
void * ctx, /* driver context (eg, brw_context) */
void * bufmgr, /* eg brw_bufmgr */
const struct gen_device_info *devinfo,
@@ -45,6 +44,15 @@ void gen_perf_init_context(struct gen_perf_context *perf_ctx,
const struct gen_perf_query_info* gen_perf_query_info(const struct gen_perf_query_object *);
+
+void gen_perf_init_context(struct gen_perf_context *perf_ctx,
+ struct gen_perf_config *perf_cfg,
+ void * ctx, /* driver context (eg, brw_context) */
+ void * bufmgr, /* eg brw_bufmgr */
+ const struct gen_device_info *devinfo,
+ uint32_t hw_ctx,
+ int drm_fd);
+
struct gen_perf_config *gen_perf_config(struct gen_perf_context *ctx);
int gen_perf_active_queries(struct gen_perf_context *perf_ctx,
diff --git a/lib/mesa/src/intel/perf/gen_perf_regs.h b/lib/mesa/src/intel/perf/gen_perf_regs.h
index 67e7ece41..1b54fe29d 100644
--- a/lib/mesa/src/intel/perf/gen_perf_regs.h
+++ b/lib/mesa/src/intel/perf/gen_perf_regs.h
@@ -27,50 +27,17 @@
#define INTEL_MASK(high, low) (((1u<<((high)-(low)+1))-1)<<(low))
/* GT core frequency counters */
-#define GFX7_RPSTAT1 0xA01C
-#define GFX7_RPSTAT1_CURR_GT_FREQ_SHIFT 7
-#define GFX7_RPSTAT1_CURR_GT_FREQ_MASK INTEL_MASK(13, 7)
-#define GFX7_RPSTAT1_PREV_GT_FREQ_SHIFT 0
-#define GFX7_RPSTAT1_PREV_GT_FREQ_MASK INTEL_MASK(6, 0)
-
-#define GFX9_RPSTAT0 0xA01C
-#define GFX9_RPSTAT0_CURR_GT_FREQ_SHIFT 23
-#define GFX9_RPSTAT0_CURR_GT_FREQ_MASK INTEL_MASK(31, 23)
-#define GFX9_RPSTAT0_PREV_GT_FREQ_SHIFT 0
-#define GFX9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0)
-
-/* Programmable perf 64bits counters (used for GTRequestQueueFull counter on
- * gfx7-11)
- */
-#define PERF_CNT_1_DW0 0x91b8
-#define PERF_CNT_2_DW0 0x91c0
-#define PERF_CNT_VALUE_MASK ((1ull << 44) - 1)
-
-/* Global OA perf counters */
-#define GFX7_N_OA_PERF_A32 44
-#define GFX7_OA_PERF_A32(idx) (0x2800 + (idx) * 4)
-
-#define GFX8_OA_PERF_TICKS 0x2910
-#define GFX8_N_OA_PERF_A64 32
-#define GFX8_N_OA_PERF_A32 4
-#define GFX8_N_OA_PERF_B32 8
-#define GFX8_N_OA_PERF_C32 8
-#define GFX8_OA_PERF_A64_LDW(idx) (0x2800 + (idx) * 8)
-#define GFX8_OA_PERF_A64_UDW(idx) (0x2800 + (idx) * 8 + 4)
-#define GFX8_OA_PERF_A32(idx) (0x2900 + (idx) * 4)
-#define GFX8_OA_PERF_B32(idx) (0x2920 + (idx) * 4)
-#define GFX8_OA_PERF_C32(idx) (0x2940 + (idx) * 4)
-
-#define GFX12_OAG_PERF_TICKS 0xda90
-#define GFX12_N_OAG_PERF_A64 32
-#define GFX12_N_OAG_PERF_A32 4
-#define GFX12_N_OAG_PERF_B32 8
-#define GFX12_N_OAG_PERF_C32 8
-#define GFX12_OAG_PERF_A64_LDW(idx) (0xd980 + (idx) * 8)
-#define GFX12_OAG_PERF_A64_UDW(idx) (0xd980 + (idx) * 8 + 4)
-#define GFX12_OAG_PERF_A32(idx) (0xda80 + (idx) * 4)
-#define GFX12_OAG_PERF_B32(idx) (0xda94 + (idx) * 4)
-#define GFX12_OAG_PERF_C32(idx) (0xdab4 + (idx) * 4)
+#define GEN7_RPSTAT1 0xA01C
+#define GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT 7
+#define GEN7_RPSTAT1_CURR_GT_FREQ_MASK INTEL_MASK(13, 7)
+#define GEN7_RPSTAT1_PREV_GT_FREQ_SHIFT 0
+#define GEN7_RPSTAT1_PREV_GT_FREQ_MASK INTEL_MASK(6, 0)
+
+#define GEN9_RPSTAT0 0xA01C
+#define GEN9_RPSTAT0_CURR_GT_FREQ_SHIFT 23
+#define GEN9_RPSTAT0_CURR_GT_FREQ_MASK INTEL_MASK(31, 23)
+#define GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT 0
+#define GEN9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0)
/* Pipeline statistic counters */
#define IA_VERTICES_COUNT 0x2310
@@ -87,9 +54,9 @@
#define PS_DEPTH_COUNT 0x2350
/* Stream-out counters */
-#define GFX6_SO_PRIM_STORAGE_NEEDED 0x2280
-#define GFX7_SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8)
-#define GFX6_SO_NUM_PRIMS_WRITTEN 0x2288
-#define GFX7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
+#define GEN6_SO_PRIM_STORAGE_NEEDED 0x2280
+#define GEN7_SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8)
+#define GEN6_SO_NUM_PRIMS_WRITTEN 0x2288
+#define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
#endif /* GEN_PERF_REGS_H */