diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-08-26 05:29:31 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-08-26 05:29:31 +0000 |
commit | b588b4f3eff82e42345c0b15670ab089b53f9cd6 (patch) | |
tree | 49350a8ad21d0a8b6f6b5313a33a3080ae81d821 /lib/mesa/src | |
parent | 2ebab484cac65c01dd19e8c1b62eb58c83074390 (diff) |
Import Mesa 20.1.6
Diffstat (limited to 'lib/mesa/src')
-rw-r--r-- | lib/mesa/src/amd/Android.compiler.mk | 4 | ||||
-rw-r--r-- | lib/mesa/src/freedreno/vulkan/tu_wsi_x11.c | 4 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/panfrost/Android.mk | 3 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/panfrost/Makefile.sources | 18 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/zink/zink_draw.c | 678 | ||||
-rw-r--r-- | lib/mesa/src/intel/compiler/brw_gen_enum.h | 53 | ||||
-rw-r--r-- | lib/mesa/src/intel/perf/gen_perf_private.h | 9 | ||||
-rw-r--r-- | lib/mesa/src/intel/perf/gen_perf_query.c | 264 | ||||
-rw-r--r-- | lib/mesa/src/intel/perf/gen_perf_query.h | 10 | ||||
-rw-r--r-- | lib/mesa/src/intel/perf/gen_perf_regs.h | 63 |
10 files changed, 367 insertions, 739 deletions
diff --git a/lib/mesa/src/amd/Android.compiler.mk b/lib/mesa/src/amd/Android.compiler.mk index 7d4a1ea43..3f544ac3c 100644 --- a/lib/mesa/src/amd/Android.compiler.mk +++ b/lib/mesa/src/amd/Android.compiler.mk @@ -32,8 +32,10 @@ include $(CLEAR_VARS) LOCAL_MODULE := libmesa_aco +# filter-out compiler/aco_instruction_selection_setup.cpp because +# it's already included by compiler/aco_instruction_selection.cpp LOCAL_SRC_FILES := \ - $(ACO_FILES) + $(filter-out compiler/aco_instruction_selection_setup.cpp, $(ACO_FILES)) LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU # instructs LLVM to declare LLVMInitializeAMDGPU* functions diff --git a/lib/mesa/src/freedreno/vulkan/tu_wsi_x11.c b/lib/mesa/src/freedreno/vulkan/tu_wsi_x11.c index e6ce75e7f..180e504a0 100644 --- a/lib/mesa/src/freedreno/vulkan/tu_wsi_x11.c +++ b/lib/mesa/src/freedreno/vulkan/tu_wsi_x11.c @@ -75,7 +75,7 @@ VkResult tu_CreateXcbSurfaceKHR( if (pAllocator) alloc = pAllocator; else - alloc = &instance->vk.alloc; + alloc = &instance->alloc; return wsi_create_xcb_surface(alloc, pCreateInfo, pSurface); } @@ -94,7 +94,7 @@ VkResult tu_CreateXlibSurfaceKHR( if (pAllocator) alloc = pAllocator; else - alloc = &instance->vk.alloc; + alloc = &instance->alloc; return wsi_create_xlib_surface(alloc, pCreateInfo, pSurface); } diff --git a/lib/mesa/src/gallium/drivers/panfrost/Android.mk b/lib/mesa/src/gallium/drivers/panfrost/Android.mk index 48c4d52c7..c7ad6e175 100644 --- a/lib/mesa/src/gallium/drivers/panfrost/Android.mk +++ b/lib/mesa/src/gallium/drivers/panfrost/Android.mk @@ -42,7 +42,8 @@ LOCAL_STATIC_LIBRARIES := \ libmesa_nir \ libmesa_winsys_panfrost \ libpanfrost_bifrost \ - libpanfrost_lib \ + libpanfrost_decode \ + libpanfrost_encoder \ libpanfrost_midgard \ libpanfrost_shared \ libpanfrost_util \ diff --git a/lib/mesa/src/gallium/drivers/panfrost/Makefile.sources b/lib/mesa/src/gallium/drivers/panfrost/Makefile.sources index 470dfb31e..c734cd080 100644 --- a/lib/mesa/src/gallium/drivers/panfrost/Makefile.sources +++ b/lib/mesa/src/gallium/drivers/panfrost/Makefile.sources @@ -1,17 +1,31 @@ C_SOURCES := \ + nir/nir_lower_blend.c \ + nir/nir_lower_blend.h \ + nir/nir_lower_framebuffer.c \ + \ + pan_allocate.c \ + pan_allocate.h \ pan_assemble.c \ pan_blend_cso.c \ - pan_blend_cso.h \ + pan_blend.h \ + pan_blending.c \ + pan_blending.h \ + pan_blend_shaders.c \ + pan_blend_shaders.h \ pan_blit.c \ pan_cmdstream.c \ pan_cmdstream.h \ pan_compute.c \ pan_context.c \ pan_context.h \ + pan_fragment.c \ pan_job.c \ pan_job.h \ + pan_mfbd.c \ pan_public.h \ pan_resource.c \ pan_resource.h \ + pan_scoreboard.c \ pan_screen.c \ - pan_screen.h + pan_screen.h \ + pan_sfbd.c \ diff --git a/lib/mesa/src/gallium/drivers/zink/zink_draw.c b/lib/mesa/src/gallium/drivers/zink/zink_draw.c index 8fc31b13d..553579acf 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_draw.c +++ b/lib/mesa/src/gallium/drivers/zink/zink_draw.c @@ -1,141 +1,39 @@ #include "zink_compiler.h" #include "zink_context.h" #include "zink_program.h" -#include "zink_query.h" #include "zink_resource.h" #include "zink_screen.h" #include "zink_state.h" -#include "zink_surface.h" #include "indices/u_primconvert.h" -#include "tgsi/tgsi_from_mesa.h" #include "util/hash_table.h" #include "util/u_debug.h" #include "util/u_helpers.h" #include "util/u_inlines.h" #include "util/u_prim.h" -#include "util/u_prim_restart.h" - -static void -zink_emit_xfb_counter_barrier(struct zink_context *ctx) -{ - /* Between the pause and resume there needs to be a memory barrier for the counter buffers - * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT - * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT - * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT - * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT. - * - * - from VK_EXT_transform_feedback spec - */ - for (unsigned i = 0; i < ctx->num_so_targets; i++) { - struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); - if (!t) - continue; - struct zink_resource *res = zink_resource(t->counter_buffer); - if (t->counter_buffer_valid) - zink_resource_buffer_barrier(ctx, NULL, res, VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT); - else - zink_resource_buffer_barrier(ctx, NULL, res, VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT, - VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT); - } - ctx->xfb_barrier = false; -} - -static void -zink_emit_xfb_vertex_input_barrier(struct zink_context *ctx, struct zink_resource *res) +static VkDescriptorSet +allocate_descriptor_set(struct zink_screen *screen, + struct zink_batch *batch, + struct zink_gfx_program *prog) { - /* A pipeline barrier is required between using the buffers as - * transform feedback buffers and vertex buffers to - * ensure all writes to the transform feedback buffers are visible - * when the data is read as vertex attributes. - * The source access is VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT - * and the destination access is VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT - * for the pipeline stages VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT - * and VK_PIPELINE_STAGE_VERTEX_INPUT_BIT respectively. - * - * - 20.3.1. Drawing Transform Feedback - */ - zink_resource_buffer_barrier(ctx, NULL, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT); -} - -static void -zink_emit_stream_output_targets(struct pipe_context *pctx) -{ - struct zink_context *ctx = zink_context(pctx); - struct zink_screen *screen = zink_screen(pctx->screen); - struct zink_batch *batch = &ctx->batch; - VkBuffer buffers[PIPE_MAX_SO_OUTPUTS] = {}; - VkDeviceSize buffer_offsets[PIPE_MAX_SO_OUTPUTS] = {}; - VkDeviceSize buffer_sizes[PIPE_MAX_SO_OUTPUTS] = {}; - - for (unsigned i = 0; i < ctx->num_so_targets; i++) { - struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i]; - if (!t) { - /* no need to reference this or anything */ - buffers[i] = zink_resource(ctx->dummy_xfb_buffer)->obj->buffer; - buffer_offsets[i] = 0; - buffer_sizes[i] = sizeof(uint8_t); - continue; - } - struct zink_resource *res = zink_resource(t->base.buffer); - if (!(res->bind_history & ZINK_RESOURCE_USAGE_STREAMOUT)) - /* resource has been rebound */ - t->counter_buffer_valid = false; - buffers[i] = res->obj->buffer; - zink_resource_buffer_barrier(ctx, NULL, zink_resource(t->base.buffer), - VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT); - zink_batch_reference_resource_rw(batch, res, true); - buffer_offsets[i] = t->base.buffer_offset; - buffer_sizes[i] = t->base.buffer_size; - res->bind_history |= ZINK_RESOURCE_USAGE_STREAMOUT; - util_range_add(t->base.buffer, &res->valid_buffer_range, t->base.buffer_offset, - t->base.buffer_offset + t->base.buffer_size); + assert(batch->descs_left >= prog->num_descriptors); + VkDescriptorSetAllocateInfo dsai; + memset((void *)&dsai, 0, sizeof(dsai)); + dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + dsai.pNext = NULL; + dsai.descriptorPool = batch->descpool; + dsai.descriptorSetCount = 1; + dsai.pSetLayouts = &prog->dsl; + + VkDescriptorSet desc_set; + if (vkAllocateDescriptorSets(screen->dev, &dsai, &desc_set) != VK_SUCCESS) { + debug_printf("ZINK: failed to allocate descriptor set :/"); + return VK_NULL_HANDLE; } - screen->vk_CmdBindTransformFeedbackBuffersEXT(batch->state->cmdbuf, 0, ctx->num_so_targets, - buffers, buffer_offsets, - buffer_sizes); - ctx->dirty_so_targets = false; -} - -static void -barrier_vertex_buffers(struct zink_context *ctx) -{ - const struct zink_vertex_elements_state *elems = ctx->element_state; - for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) { - struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->binding_map[i]; - assert(vb); - if (vb->buffer.resource) { - struct zink_resource *res = zink_resource(vb->buffer.resource); - zink_resource_buffer_barrier(ctx, NULL, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT); - } - } -} - -static void -check_buffer_barrier(struct zink_context *ctx, struct pipe_resource *pres, VkAccessFlags flags, VkPipelineStageFlags pipeline) -{ - struct zink_resource *res = zink_resource(pres); - zink_resource_buffer_barrier(ctx, NULL, res, flags, pipeline); -} - -static void -barrier_draw_buffers(struct zink_context *ctx, const struct pipe_draw_info *dinfo, - const struct pipe_draw_indirect_info *dindirect, struct pipe_resource *index_buffer) -{ - if (index_buffer) - check_buffer_barrier(ctx, index_buffer, VK_ACCESS_INDEX_READ_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT); - if (dindirect && dindirect->buffer) { - check_buffer_barrier(ctx, dindirect->buffer, - VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT); - if (dindirect->indirect_draw_count) - check_buffer_barrier(ctx, dindirect->indirect_draw_count, - VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT); - } + batch->descs_left -= prog->num_descriptors; + return desc_set; } static void @@ -143,100 +41,43 @@ zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx) { VkBuffer buffers[PIPE_MAX_ATTRIBS]; VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS]; - VkDeviceSize buffer_strides[PIPE_MAX_ATTRIBS]; const struct zink_vertex_elements_state *elems = ctx->element_state; - struct zink_screen *screen = zink_screen(ctx->base.screen); - - if (!elems->hw_state.num_bindings) - return; - for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) { - struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->binding_map[i]; + struct pipe_vertex_buffer *vb = ctx->buffers + ctx->element_state->binding_map[i]; assert(vb); if (vb->buffer.resource) { struct zink_resource *res = zink_resource(vb->buffer.resource); - buffers[i] = res->obj->buffer; + buffers[i] = res->buffer; buffer_offsets[i] = vb->buffer_offset; - buffer_strides[i] = vb->stride; - zink_batch_reference_resource_rw(batch, res, false); + zink_batch_reference_resoure(batch, res); } else { - buffers[i] = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer; + buffers[i] = zink_resource(ctx->dummy_buffer)->buffer; buffer_offsets[i] = 0; - buffer_strides[i] = 0; } } - if (screen->info.have_EXT_extended_dynamic_state) - screen->vk_CmdBindVertexBuffers2EXT(batch->state->cmdbuf, 0, - elems->hw_state.num_bindings, - buffers, buffer_offsets, NULL, buffer_strides); - else - vkCmdBindVertexBuffers(batch->state->cmdbuf, 0, + if (elems->hw_state.num_bindings > 0) + vkCmdBindVertexBuffers(batch->cmdbuf, 0, elems->hw_state.num_bindings, buffers, buffer_offsets); } -static struct zink_compute_program * -get_compute_program(struct zink_context *ctx) -{ - unsigned bits = 1 << PIPE_SHADER_COMPUTE; - ctx->dirty_shader_stages |= ctx->inlinable_uniforms_dirty_mask & - ctx->inlinable_uniforms_valid_mask & - ctx->shader_has_inlinable_uniforms_mask & bits; - if (ctx->dirty_shader_stages & bits) { - struct hash_entry *entry = _mesa_hash_table_search(ctx->compute_program_cache, - &ctx->compute_stage->shader_id); - if (!entry) { - struct zink_compute_program *comp; - comp = zink_create_compute_program(ctx, ctx->compute_stage); - entry = _mesa_hash_table_insert(ctx->compute_program_cache, &comp->shader->shader_id, comp); - if (!entry) - return NULL; - } - if (entry->data != ctx->curr_compute) - ctx->compute_pipeline_state.dirty = true; - ctx->curr_compute = entry->data; - ctx->dirty_shader_stages &= bits; - ctx->inlinable_uniforms_dirty_mask &= bits; - } - - assert(ctx->curr_compute); - return ctx->curr_compute; -} - static struct zink_gfx_program * get_gfx_program(struct zink_context *ctx) { - if (ctx->last_vertex_stage_dirty) { - if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY]) - ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_GEOMETRY); - else if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]) - ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_TESS_EVAL); - else - ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_VERTEX); - ctx->last_vertex_stage_dirty = false; - } - unsigned bits = u_bit_consecutive(PIPE_SHADER_VERTEX, 5); - ctx->dirty_shader_stages |= ctx->inlinable_uniforms_dirty_mask & - ctx->inlinable_uniforms_valid_mask & - ctx->shader_has_inlinable_uniforms_mask & bits; - if (ctx->dirty_shader_stages & bits) { + if (ctx->dirty_program) { struct hash_entry *entry = _mesa_hash_table_search(ctx->program_cache, ctx->gfx_stages); - if (entry) - zink_update_gfx_program(ctx, entry->data); - else { + if (!entry) { struct zink_gfx_program *prog; - prog = zink_create_gfx_program(ctx, ctx->gfx_stages); - entry = _mesa_hash_table_insert(ctx->program_cache, prog->shaders, prog); + prog = zink_create_gfx_program(zink_screen(ctx->base.screen), + ctx->gfx_stages); + entry = _mesa_hash_table_insert(ctx->program_cache, prog->stages, prog); if (!entry) return NULL; } - if (ctx->curr_program != entry->data) - ctx->gfx_pipeline_state.combined_dirty = true; ctx->curr_program = entry->data; - ctx->dirty_shader_stages &= ~bits; - ctx->inlinable_uniforms_dirty_mask &= ~bits; + ctx->dirty_program = false; } assert(ctx->curr_program); @@ -262,92 +103,32 @@ line_width_needed(enum pipe_prim_type reduced_prim, } } -static inline bool -restart_supported(enum pipe_prim_type mode) -{ - return mode == PIPE_PRIM_LINE_STRIP || mode == PIPE_PRIM_TRIANGLE_STRIP || mode == PIPE_PRIM_TRIANGLE_FAN; -} - -static void -update_drawid(struct zink_context *ctx, unsigned draw_id) -{ - struct zink_batch *batch = &ctx->batch; - if (ctx->drawid_broken) { - vkCmdPushConstants(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT, - offsetof(struct zink_gfx_push_constant, draw_id), sizeof(unsigned), - &draw_id); - } -} - void zink_draw_vbo(struct pipe_context *pctx, - const struct pipe_draw_info *dinfo, - const struct pipe_draw_indirect_info *dindirect, - const struct pipe_draw_start_count *draws, - unsigned num_draws) + const struct pipe_draw_info *dinfo) { - if (!dindirect && (!draws[0].count || !dinfo->instance_count)) - return; - struct zink_context *ctx = zink_context(pctx); struct zink_screen *screen = zink_screen(pctx->screen); struct zink_rasterizer_state *rast_state = ctx->rast_state; - struct zink_depth_stencil_alpha_state *dsa_state = ctx->dsa_state; - struct zink_so_target *so_target = - dindirect && dindirect->count_from_stream_output ? - zink_so_target(dindirect->count_from_stream_output) : NULL; - VkBuffer counter_buffers[PIPE_MAX_SO_OUTPUTS]; - VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_OUTPUTS]; - bool need_index_buffer_unref = false; - - /* check memory usage and flush/stall as needed to avoid oom */ - zink_maybe_flush_or_stall(ctx); - - if (dinfo->primitive_restart && !restart_supported(dinfo->mode)) { - util_draw_vbo_without_prim_restart(pctx, dinfo, dindirect, &draws[0]); - return; - } - if (dinfo->mode == PIPE_PRIM_QUADS || - dinfo->mode == PIPE_PRIM_QUAD_STRIP || - dinfo->mode == PIPE_PRIM_POLYGON || - (dinfo->mode == PIPE_PRIM_TRIANGLE_FAN && !screen->have_triangle_fans) || - dinfo->mode == PIPE_PRIM_LINE_LOOP) { + + if (dinfo->mode >= PIPE_PRIM_QUADS || + dinfo->mode == PIPE_PRIM_LINE_LOOP || + dinfo->index_size == 1) { + if (!u_trim_pipe_prim(dinfo->mode, (unsigned *)&dinfo->count)) + return; + util_primconvert_save_rasterizer_state(ctx->primconvert, &rast_state->base); - util_primconvert_draw_vbo(ctx->primconvert, dinfo, dindirect, draws, num_draws); + util_primconvert_draw_vbo(ctx->primconvert, dinfo); return; } - if (ctx->gfx_pipeline_state.vertices_per_patch != dinfo->vertices_per_patch) - ctx->gfx_pipeline_state.dirty = true; - bool drawid_broken = ctx->drawid_broken; - ctx->drawid_broken = BITSET_TEST(ctx->gfx_stages[PIPE_SHADER_VERTEX]->nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID) && - (!dindirect || !dindirect->buffer); - if (drawid_broken != ctx->drawid_broken) - ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_VERTEX); - ctx->gfx_pipeline_state.vertices_per_patch = dinfo->vertices_per_patch; - if (ctx->rast_state->base.point_quad_rasterization && - ctx->gfx_prim_mode != dinfo->mode) { - if (ctx->gfx_prim_mode == PIPE_PRIM_POINTS || dinfo->mode == PIPE_PRIM_POINTS) - ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_FRAGMENT); - } - ctx->gfx_prim_mode = dinfo->mode; + struct zink_gfx_program *gfx_program = get_gfx_program(ctx); if (!gfx_program) return; - if (ctx->gfx_pipeline_state.primitive_restart != !!dinfo->primitive_restart) - ctx->gfx_pipeline_state.dirty = true; - ctx->gfx_pipeline_state.primitive_restart = !!dinfo->primitive_restart; - - if (!zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state) { - for (unsigned i = 0; i < ctx->element_state->hw_state.num_bindings; i++) { - unsigned binding = ctx->element_state->binding_map[i]; - const struct pipe_vertex_buffer *vb = ctx->vertex_buffers + binding; - if (ctx->gfx_pipeline_state.bindings[i].stride != vb->stride) { - ctx->gfx_pipeline_state.bindings[i].stride = vb->stride; - ctx->gfx_pipeline_state.dirty = true; - } - } - } + VkPipeline pipeline = zink_get_gfx_pipeline(screen, gfx_program, + &ctx->gfx_pipeline_state, + dinfo->mode); enum pipe_prim_type reduced_prim = u_reduced_prim(dinfo->mode); @@ -372,285 +153,160 @@ zink_draw_vbo(struct pipe_context *pctx, unsigned index_offset = 0; struct pipe_resource *index_buffer = NULL; if (dinfo->index_size > 0) { - uint32_t restart_index = util_prim_restart_index_from_size(dinfo->index_size); - if ((dinfo->primitive_restart && (dinfo->restart_index != restart_index)) || - (!screen->info.have_EXT_index_type_uint8 && dinfo->index_size == 1)) { - util_translate_prim_restart_ib(pctx, dinfo, dindirect, &draws[0], &index_buffer); - need_index_buffer_unref = true; - } else { - if (dinfo->has_user_indices) { - if (!util_upload_index_buffer(pctx, dinfo, &draws[0], &index_buffer, &index_offset, 4)) { - debug_printf("util_upload_index_buffer() failed\n"); - return; - } - } else - index_buffer = dinfo->index.resource; - } + if (dinfo->has_user_indices) { + if (!util_upload_index_buffer(pctx, dinfo, &index_buffer, &index_offset, 4)) { + debug_printf("util_upload_index_buffer() failed\n"); + return; + } + } else + index_buffer = dinfo->index.resource; } - if (ctx->xfb_barrier) - zink_emit_xfb_counter_barrier(ctx); - - if (ctx->dirty_so_targets && ctx->num_so_targets) - zink_emit_stream_output_targets(pctx); - if (so_target) - zink_emit_xfb_vertex_input_barrier(ctx, zink_resource(so_target->base.buffer)); + VkWriteDescriptorSet wds[PIPE_SHADER_TYPES * PIPE_MAX_CONSTANT_BUFFERS + PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS]; + VkDescriptorBufferInfo buffer_infos[PIPE_SHADER_TYPES * PIPE_MAX_CONSTANT_BUFFERS]; + VkDescriptorImageInfo image_infos[PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS]; + int num_wds = 0, num_buffer_info = 0, num_image_info = 0; - barrier_vertex_buffers(ctx); - barrier_draw_buffers(ctx, dinfo, dindirect, index_buffer); + struct zink_resource *transitions[PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS]; + int num_transitions = 0; - for (int i = 0; i < ZINK_SHADER_COUNT; i++) { + for (int i = 0; i < ARRAY_SIZE(ctx->gfx_stages); i++) { struct zink_shader *shader = ctx->gfx_stages[i]; if (!shader) continue; - enum pipe_shader_type stage = pipe_shader_type_from_mesa(shader->nir->info.stage); - if (ctx->num_so_targets && - (stage == PIPE_SHADER_GEOMETRY || - (stage == PIPE_SHADER_TESS_EVAL && !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]) || - (stage == PIPE_SHADER_VERTEX && !ctx->gfx_stages[PIPE_SHADER_GEOMETRY] && !ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]))) { - for (unsigned j = 0; j < ctx->num_so_targets; j++) { - struct zink_so_target *t = zink_so_target(ctx->so_targets[j]); - if (t) - t->stride = shader->streamout.so_info.stride[j] * sizeof(uint32_t); + + for (int j = 0; j < shader->num_bindings; j++) { + int index = shader->bindings[j].index; + if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { + assert(ctx->ubos[i][index].buffer_size > 0); + assert(ctx->ubos[i][index].buffer_size <= screen->props.limits.maxUniformBufferRange); + assert(ctx->ubos[i][index].buffer); + struct zink_resource *res = zink_resource(ctx->ubos[i][index].buffer); + buffer_infos[num_buffer_info].buffer = res->buffer; + buffer_infos[num_buffer_info].offset = ctx->ubos[i][index].buffer_offset; + buffer_infos[num_buffer_info].range = ctx->ubos[i][index].buffer_size; + wds[num_wds].pBufferInfo = buffer_infos + num_buffer_info; + ++num_buffer_info; + } else { + struct pipe_sampler_view *psampler_view = ctx->image_views[i][index]; + assert(psampler_view); + struct zink_sampler_view *sampler_view = zink_sampler_view(psampler_view); + + struct zink_resource *res = zink_resource(psampler_view->texture); + VkImageLayout layout = res->layout; + if (layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL && + layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL && + layout != VK_IMAGE_LAYOUT_GENERAL) { + transitions[num_transitions++] = res; + layout = VK_IMAGE_LAYOUT_GENERAL; + } + image_infos[num_image_info].imageLayout = layout; + image_infos[num_image_info].imageView = sampler_view->image_view; + image_infos[num_image_info].sampler = ctx->samplers[i][index]; + wds[num_wds].pImageInfo = image_infos + num_image_info; + ++num_image_info; } - } - } - if (zink_program_has_descriptors(&gfx_program->base)) - zink_descriptors_update(ctx, screen, false); - - struct zink_batch *batch = zink_batch_rp(ctx); - VkViewport viewports[PIPE_MAX_VIEWPORTS]; - for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) { - VkViewport viewport = { - ctx->vp_state.viewport_states[i].translate[0] - ctx->vp_state.viewport_states[i].scale[0], - ctx->vp_state.viewport_states[i].translate[1] - ctx->vp_state.viewport_states[i].scale[1], - ctx->vp_state.viewport_states[i].scale[0] * 2, - ctx->vp_state.viewport_states[i].scale[1] * 2, - ctx->rast_state->base.clip_halfz ? - ctx->vp_state.viewport_states[i].translate[2] : - ctx->vp_state.viewport_states[i].translate[2] - ctx->vp_state.viewport_states[i].scale[2], - ctx->vp_state.viewport_states[i].translate[2] + ctx->vp_state.viewport_states[i].scale[2] - }; - viewports[i] = viewport; - } - if (screen->info.have_EXT_extended_dynamic_state) - screen->vk_CmdSetViewportWithCountEXT(batch->state->cmdbuf, ctx->vp_state.num_viewports, viewports); - else - vkCmdSetViewport(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, viewports); - VkRect2D scissors[PIPE_MAX_VIEWPORTS]; - if (ctx->rast_state->base.scissor) { - for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) { - scissors[i].offset.x = ctx->vp_state.scissor_states[i].minx; - scissors[i].offset.y = ctx->vp_state.scissor_states[i].miny; - scissors[i].extent.width = ctx->vp_state.scissor_states[i].maxx - ctx->vp_state.scissor_states[i].minx; - scissors[i].extent.height = ctx->vp_state.scissor_states[i].maxy - ctx->vp_state.scissor_states[i].miny; - } - } else { - for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) { - scissors[i].offset.x = 0; - scissors[i].offset.y = 0; - scissors[i].extent.width = ctx->fb_state.width; - scissors[i].extent.height = ctx->fb_state.height; + wds[num_wds].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + wds[num_wds].pNext = NULL; + wds[num_wds].dstBinding = shader->bindings[j].binding; + wds[num_wds].dstArrayElement = 0; + wds[num_wds].descriptorCount = 1; + wds[num_wds].descriptorType = shader->bindings[j].type; + ++num_wds; } } - if (screen->info.have_EXT_extended_dynamic_state) - screen->vk_CmdSetScissorWithCountEXT(batch->state->cmdbuf, ctx->vp_state.num_viewports, scissors); - else - vkCmdSetScissor(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, scissors); - if (line_width_needed(reduced_prim, rast_state->hw_state.polygon_mode)) { - if (screen->info.feats.features.wideLines || ctx->line_width == 1.0f) - vkCmdSetLineWidth(batch->state->cmdbuf, ctx->line_width); - else - debug_printf("BUG: wide lines not supported, needs fallback!"); - } + struct zink_batch *batch; + if (num_transitions > 0) { + batch = zink_batch_no_rp(ctx); - if (dsa_state->base.stencil[0].enabled) { - if (dsa_state->base.stencil[1].enabled) { - vkCmdSetStencilReference(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, - ctx->stencil_ref.ref_value[0]); - vkCmdSetStencilReference(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, - ctx->stencil_ref.ref_value[1]); - } else - vkCmdSetStencilReference(batch->state->cmdbuf, - VK_STENCIL_FACE_FRONT_AND_BACK, - ctx->stencil_ref.ref_value[0]); + for (int i = 0; i < num_transitions; ++i) + zink_resource_barrier(batch->cmdbuf, transitions[i], + transitions[i]->aspect, + VK_IMAGE_LAYOUT_GENERAL); } - if (depth_bias) - vkCmdSetDepthBias(batch->state->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale); - else - vkCmdSetDepthBias(batch->state->cmdbuf, 0.0f, 0.0f, 0.0f); + batch = zink_batch_rp(ctx); - if (ctx->gfx_pipeline_state.blend_state->need_blend_constants) - vkCmdSetBlendConstants(batch->state->cmdbuf, ctx->blend_constants); + if (batch->descs_left < gfx_program->num_descriptors) { + ctx->base.flush(&ctx->base, NULL, 0); + batch = zink_batch_rp(ctx); + assert(batch->descs_left >= gfx_program->num_descriptors); + } + VkDescriptorSet desc_set = allocate_descriptor_set(screen, batch, + gfx_program); + assert(desc_set != VK_NULL_HANDLE); - VkPipeline pipeline = zink_get_gfx_pipeline(screen, gfx_program, - &ctx->gfx_pipeline_state, - dinfo->mode); - vkCmdBindPipeline(batch->state->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - - zink_bind_vertex_buffers(batch, ctx); + for (int i = 0; i < ARRAY_SIZE(ctx->gfx_stages); i++) { + struct zink_shader *shader = ctx->gfx_stages[i]; + if (!shader) + continue; - if (BITSET_TEST(ctx->gfx_stages[PIPE_SHADER_VERTEX]->nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX)) { - unsigned draw_mode_is_indexed = dinfo->index_size > 0; - vkCmdPushConstants(batch->state->cmdbuf, gfx_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT, - offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned), - &draw_mode_is_indexed); - } - if (gfx_program->shaders[PIPE_SHADER_TESS_CTRL] && gfx_program->shaders[PIPE_SHADER_TESS_CTRL]->is_generated) - vkCmdPushConstants(batch->state->cmdbuf, gfx_program->base.layout, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, - offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6, - &ctx->tess_levels[0]); - - zink_query_update_gs_states(ctx); - - if (ctx->num_so_targets) { - for (unsigned i = 0; i < ctx->num_so_targets; i++) { - struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); - counter_buffers[i] = VK_NULL_HANDLE; - if (t) { - struct zink_resource *res = zink_resource(t->counter_buffer); - zink_batch_reference_resource_rw(batch, res, true); - if (t->counter_buffer_valid) { - counter_buffers[i] = res->obj->buffer; - counter_buffer_offsets[i] = t->counter_buffer_offset; - } + for (int j = 0; j < shader->num_bindings; j++) { + int index = shader->bindings[j].index; + if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { + struct zink_resource *res = zink_resource(ctx->ubos[i][index].buffer); + zink_batch_reference_resoure(batch, res); + } else { + struct zink_sampler_view *sampler_view = zink_sampler_view(ctx->image_views[i][index]); + zink_batch_reference_sampler_view(batch, sampler_view); } } - screen->vk_CmdBeginTransformFeedbackEXT(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets); } - unsigned draw_id = dinfo->drawid; - if (dinfo->index_size > 0) { - VkIndexType index_type; - unsigned index_size = dinfo->index_size; - if (need_index_buffer_unref) - /* index buffer will have been promoted from uint8 to uint16 in this case */ - index_size = MAX2(index_size, 2); - switch (index_size) { - case 1: - assert(screen->info.have_EXT_index_type_uint8); - index_type = VK_INDEX_TYPE_UINT8_EXT; - break; - case 2: - index_type = VK_INDEX_TYPE_UINT16; - break; - case 4: - index_type = VK_INDEX_TYPE_UINT32; - break; - default: - unreachable("unknown index size!"); - } - struct zink_resource *res = zink_resource(index_buffer); - vkCmdBindIndexBuffer(batch->state->cmdbuf, res->obj->buffer, index_offset, index_type); - zink_batch_reference_resource_rw(batch, res, false); - if (dindirect && dindirect->buffer) { - assert(num_draws == 1); - update_drawid(ctx, draw_id); - struct zink_resource *indirect = zink_resource(dindirect->buffer); - zink_batch_reference_resource_rw(batch, indirect, false); - if (dindirect->indirect_draw_count) { - struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count); - zink_batch_reference_resource_rw(batch, indirect_draw_count, false); - screen->vk_CmdDrawIndexedIndirectCount(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, - indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset, - dindirect->draw_count, dindirect->stride); - } else - vkCmdDrawIndexedIndirect(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride); - } else { - for (unsigned i = 0; i < num_draws; i++) { - update_drawid(ctx, draw_id); - vkCmdDrawIndexed(batch->state->cmdbuf, - draws[i].count, dinfo->instance_count, - need_index_buffer_unref ? 0 : draws[i].start, dinfo->index_bias, dinfo->start_instance); - if (dinfo->increment_draw_id) - draw_id++; - } - } - } else { - if (so_target && screen->info.tf_props.transformFeedbackDraw) { - update_drawid(ctx, draw_id); - zink_batch_reference_resource_rw(batch, zink_resource(so_target->base.buffer), false); - zink_batch_reference_resource_rw(batch, zink_resource(so_target->counter_buffer), true); - screen->vk_CmdDrawIndirectByteCountEXT(batch->state->cmdbuf, dinfo->instance_count, dinfo->start_instance, - zink_resource(so_target->counter_buffer)->obj->buffer, so_target->counter_buffer_offset, 0, - MIN2(so_target->stride, screen->info.tf_props.maxTransformFeedbackBufferDataStride)); - } else if (dindirect && dindirect->buffer) { - assert(num_draws == 1); - update_drawid(ctx, draw_id); - struct zink_resource *indirect = zink_resource(dindirect->buffer); - zink_batch_reference_resource_rw(batch, indirect, false); - if (dindirect->indirect_draw_count) { - struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count); - zink_batch_reference_resource_rw(batch, indirect_draw_count, false); - screen->vk_CmdDrawIndirectCount(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, - indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset, - dindirect->draw_count, dindirect->stride); - } else - vkCmdDrawIndirect(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride); - } else { - for (unsigned i = 0; i < num_draws; i++) { - update_drawid(ctx, draw_id); - vkCmdDraw(batch->state->cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance); - if (dinfo->increment_draw_id) - draw_id++; - } - } + vkCmdSetViewport(batch->cmdbuf, 0, ctx->num_viewports, ctx->viewports); + if (ctx->rast_state->base.scissor) + vkCmdSetScissor(batch->cmdbuf, 0, ctx->num_viewports, ctx->scissors); + else if (ctx->fb_state.width && ctx->fb_state.height) { + VkRect2D fb_scissor = {}; + fb_scissor.extent.width = ctx->fb_state.width; + fb_scissor.extent.height = ctx->fb_state.height; + vkCmdSetScissor(batch->cmdbuf, 0, 1, &fb_scissor); } - if (dinfo->index_size > 0 && (dinfo->has_user_indices || need_index_buffer_unref)) - pipe_resource_reference(&index_buffer, NULL); - - if (ctx->num_so_targets) { - for (unsigned i = 0; i < ctx->num_so_targets; i++) { - struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); - if (t) { - counter_buffers[i] = zink_resource(t->counter_buffer)->obj->buffer; - counter_buffer_offsets[i] = t->counter_buffer_offset; - t->counter_buffer_valid = true; - } - } - screen->vk_CmdEndTransformFeedbackEXT(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets); + if (line_width_needed(reduced_prim, rast_state->hw_state.polygon_mode)) { + if (screen->feats.wideLines || ctx->line_width == 1.0f) + vkCmdSetLineWidth(batch->cmdbuf, ctx->line_width); + else + debug_printf("BUG: wide lines not supported, needs fallback!"); } - batch->has_work = true; -} -void -zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) -{ - struct zink_context *ctx = zink_context(pctx); - struct zink_screen *screen = zink_screen(pctx->screen); - struct zink_batch *batch = &ctx->batch; - - /* check memory usage and flush/stall as needed to avoid oom */ - zink_maybe_flush_or_stall(ctx); + vkCmdSetStencilReference(batch->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, ctx->stencil_ref.ref_value[0]); + vkCmdSetStencilReference(batch->cmdbuf, VK_STENCIL_FACE_BACK_BIT, ctx->stencil_ref.ref_value[1]); - struct zink_compute_program *comp_program = get_compute_program(ctx); - if (!comp_program) - return; - - zink_program_update_compute_pipeline_state(ctx, comp_program, info->block); - VkPipeline pipeline = zink_get_compute_pipeline(screen, comp_program, - &ctx->compute_pipeline_state); - - if (zink_program_has_descriptors(&comp_program->base)) - zink_descriptors_update(ctx, screen, true); + if (depth_bias) + vkCmdSetDepthBias(batch->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale); + else + vkCmdSetDepthBias(batch->cmdbuf, 0.0f, 0.0f, 0.0f); + if (ctx->gfx_pipeline_state.blend_state->need_blend_constants) + vkCmdSetBlendConstants(batch->cmdbuf, ctx->blend_constants); - vkCmdBindPipeline(batch->state->cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + if (num_wds > 0) { + for (int i = 0; i < num_wds; ++i) + wds[i].dstSet = desc_set; + vkUpdateDescriptorSets(screen->dev, num_wds, wds, 0, NULL); + } - if (BITSET_TEST(comp_program->shader->nir->info.system_values_read, SYSTEM_VALUE_WORK_DIM)) - vkCmdPushConstants(batch->state->cmdbuf, comp_program->base.layout, VK_SHADER_STAGE_COMPUTE_BIT, - offsetof(struct zink_cs_push_constant, work_dim), sizeof(uint32_t), - &info->work_dim); + vkCmdBindPipeline(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + vkCmdBindDescriptorSets(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, + gfx_program->layout, 0, 1, &desc_set, 0, NULL); + zink_bind_vertex_buffers(batch, ctx); - if (info->indirect) { - vkCmdDispatchIndirect(batch->state->cmdbuf, zink_resource(info->indirect)->obj->buffer, info->indirect_offset); - zink_batch_reference_resource_rw(batch, zink_resource(info->indirect), false); + if (dinfo->index_size > 0) { + assert(dinfo->index_size != 1); + VkIndexType index_type = dinfo->index_size == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; + struct zink_resource *res = zink_resource(index_buffer); + vkCmdBindIndexBuffer(batch->cmdbuf, res->buffer, index_offset, index_type); + zink_batch_reference_resoure(batch, res); + vkCmdDrawIndexed(batch->cmdbuf, + dinfo->count, dinfo->instance_count, + dinfo->start, dinfo->index_bias, dinfo->start_instance); } else - vkCmdDispatch(batch->state->cmdbuf, info->grid[0], info->grid[1], info->grid[2]); - batch->has_work = true; + vkCmdDraw(batch->cmdbuf, dinfo->count, dinfo->instance_count, dinfo->start, dinfo->start_instance); + + if (dinfo->index_size > 0 && dinfo->has_user_indices) + pipe_resource_reference(&index_buffer, NULL); } diff --git a/lib/mesa/src/intel/compiler/brw_gen_enum.h b/lib/mesa/src/intel/compiler/brw_gen_enum.h index 9af169b93..cd50f9c6f 100644 --- a/lib/mesa/src/intel/compiler/brw_gen_enum.h +++ b/lib/mesa/src/intel/compiler/brw_gen_enum.h @@ -25,40 +25,37 @@ #include "dev/gen_device_info.h" enum gen { - GFX4 = (1 << 0), - GFX45 = (1 << 1), - GFX5 = (1 << 2), - GFX6 = (1 << 3), - GFX7 = (1 << 4), - GFX75 = (1 << 5), - GFX8 = (1 << 6), - GFX9 = (1 << 7), - GFX10 = (1 << 8), - GFX11 = (1 << 9), - GFX12 = (1 << 10), - GFX125 = (1 << 11), - GFX_ALL = ~0 + GEN4 = (1 << 0), + GEN45 = (1 << 1), + GEN5 = (1 << 2), + GEN6 = (1 << 3), + GEN7 = (1 << 4), + GEN75 = (1 << 5), + GEN8 = (1 << 6), + GEN9 = (1 << 7), + GEN10 = (1 << 8), + GEN11 = (1 << 9), + GEN12 = (1 << 10), + GEN_ALL = ~0 }; -#define GFX_LT(gen) ((gen) - 1) -#define GFX_GE(gen) (~GFX_LT(gen)) -#define GFX_LE(gen) (GFX_LT(gen) | (gen)) +#define GEN_LT(gen) ((gen) - 1) +#define GEN_GE(gen) (~GEN_LT(gen)) +#define GEN_LE(gen) (GEN_LT(gen) | (gen)) static enum gen gen_from_devinfo(const struct gen_device_info *devinfo) { - switch (devinfo->verx10) { - case 40: return GFX4; - case 45: return GFX45; - case 50: return GFX5; - case 60: return GFX6; - case 70: return GFX7; - case 75: return GFX75; - case 80: return GFX8; - case 90: return GFX9; - case 110: return GFX11; - case 120: return GFX12; - case 125: return GFX125; + switch (devinfo->gen) { + case 4: return devinfo->is_g4x ? GEN45 : GEN4; + case 5: return GEN5; + case 6: return GEN6; + case 7: return devinfo->is_haswell ? GEN75 : GEN7; + case 8: return GEN8; + case 9: return GEN9; + case 10: return GEN10; + case 11: return GEN11; + case 12: return GEN12; default: unreachable("not reached"); } diff --git a/lib/mesa/src/intel/perf/gen_perf_private.h b/lib/mesa/src/intel/perf/gen_perf_private.h index acca9a2b4..ac222d537 100644 --- a/lib/mesa/src/intel/perf/gen_perf_private.h +++ b/lib/mesa/src/intel/perf/gen_perf_private.h @@ -31,11 +31,6 @@ static inline uint64_t to_user_pointer(void *ptr) return (uintptr_t) ptr; } -static inline uint64_t to_const_user_pointer(const void *ptr) -{ - return (uintptr_t) ptr; -} - static inline void gen_perf_query_add_stat_reg(struct gen_perf_query_info *query, uint32_t reg, uint32_t numerator, uint32_t denominator, @@ -46,7 +41,7 @@ gen_perf_query_add_stat_reg(struct gen_perf_query_info *query, uint32_t reg, assert(query->n_counters < query->max_counters); counter = &query->counters[query->n_counters]; - counter->name = counter->symbol_name = name; + counter->name = name; counter->desc = description; counter->type = GEN_PERF_COUNTER_TYPE_RAW; counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64; @@ -76,8 +71,6 @@ gen_perf_append_query_info(struct gen_perf_config *perf, int max_counters) query = &perf->queries[perf->n_queries - 1]; memset(query, 0, sizeof(*query)); - query->perf = perf; - if (max_counters > 0) { query->max_counters = max_counters; query->counters = diff --git a/lib/mesa/src/intel/perf/gen_perf_query.c b/lib/mesa/src/intel/perf/gen_perf_query.c index 10f9b9709..b9744913b 100644 --- a/lib/mesa/src/intel/perf/gen_perf_query.c +++ b/lib/mesa/src/intel/perf/gen_perf_query.c @@ -23,7 +23,7 @@ #include <unistd.h> -#include "common/intel_gem.h" +#include "common/gen_gem.h" #include "dev/gen_debug.h" #include "dev/gen_device_info.h" @@ -36,29 +36,13 @@ #include "drm-uapi/i915_drm.h" -#include "util/compiler.h" #include "util/u_math.h" #define FILE_DEBUG_FLAG DEBUG_PERFMON - -#define MI_RPC_BO_SIZE (4096) -#define MI_FREQ_OFFSET_BYTES (256) -#define MI_PERF_COUNTERS_OFFSET_BYTES (260) - -#define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1)) - -/* Align to 64bytes, requirement for OA report write address. */ -#define TOTAL_QUERY_DATA_SIZE \ - ALIGN(256 /* OA report */ + \ - 4 /* freq register */ + \ - 8 + 8 /* perf counter 1 & 2 */, \ - 64) - - -static uint32_t field_offset(bool end, uint32_t offset) -{ - return (end ? TOTAL_QUERY_DATA_SIZE : 0) + offset; -} +#define MI_RPC_BO_SIZE 4096 +#define MI_FREQ_START_OFFSET_BYTES (3072) +#define MI_RPC_BO_END_OFFSET_BYTES (MI_RPC_BO_SIZE / 2) +#define MI_FREQ_END_OFFSET_BYTES (3076) #define MAP_READ (1 << 0) #define MAP_WRITE (1 << 1) @@ -235,6 +219,11 @@ struct gen_perf_query_object bool results_accumulated; /** + * Frequency of the GT at begin and end of the query. + */ + uint64_t gt_frequency[2]; + + /** * Accumulated OA results between begin and end of the query. */ struct gen_perf_query_result result; @@ -253,7 +242,6 @@ struct gen_perf_query_object struct gen_perf_context { struct gen_perf_config *perf; - void * mem_ctx; /* ralloc context */ void * ctx; /* driver context (eg, brw_context) */ void * bufmgr; const struct gen_device_info *devinfo; @@ -320,7 +308,7 @@ static bool inc_n_users(struct gen_perf_context *perf_ctx) { if (perf_ctx->n_oa_users == 0 && - intel_ioctl(perf_ctx->oa_stream_fd, I915_PERF_IOCTL_ENABLE, 0) < 0) + gen_ioctl(perf_ctx->oa_stream_fd, I915_PERF_IOCTL_ENABLE, 0) < 0) { return false; } @@ -339,7 +327,7 @@ dec_n_users(struct gen_perf_context *perf_ctx) */ --perf_ctx->n_oa_users; if (perf_ctx->n_oa_users == 0 && - intel_ioctl(perf_ctx->oa_stream_fd, I915_PERF_IOCTL_DISABLE, 0) < 0) + gen_ioctl(perf_ctx->oa_stream_fd, I915_PERF_IOCTL_DISABLE, 0) < 0) { DBG("WARNING: Error disabling gen perf stream: %m\n"); } @@ -360,6 +348,8 @@ gen_perf_close(struct gen_perf_context *perfquery, } } +#define NUM_PERF_PROPERTIES(array) (ARRAY_SIZE(array) / 2) + static bool gen_perf_open(struct gen_perf_context *perf_ctx, int metrics_set_id, @@ -368,43 +358,31 @@ gen_perf_open(struct gen_perf_context *perf_ctx, int drm_fd, uint32_t ctx_id) { - uint64_t properties[DRM_I915_PERF_PROP_MAX * 2]; - uint32_t p = 0; - - /* Single context sampling */ - properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE; - properties[p++] = ctx_id; - - /* Include OA reports in samples */ - properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA; - properties[p++] = true; + uint64_t properties[] = { + /* Single context sampling */ + DRM_I915_PERF_PROP_CTX_HANDLE, ctx_id, - /* OA unit configuration */ - properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET; - properties[p++] = metrics_set_id; + /* Include OA reports in samples */ + DRM_I915_PERF_PROP_SAMPLE_OA, true, - properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT; - properties[p++] = report_format; - - properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT; - properties[p++] = period_exponent; - - /* SSEU configuration */ - if (gen_perf_has_global_sseu(perf_ctx->perf)) { - properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU; - properties[p++] = to_user_pointer(&perf_ctx->perf->sseu); - } - - assert(p <= ARRAY_SIZE(properties)); + /* OA unit configuration */ + DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, + DRM_I915_PERF_PROP_OA_FORMAT, report_format, + DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, + /* SSEU configuration */ + DRM_I915_PERF_PROP_GLOBAL_SSEU, to_user_pointer(&perf_ctx->perf->sseu), + }; struct drm_i915_perf_open_param param = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, - .num_properties = p / 2, + .num_properties = perf_ctx->perf->i915_perf_version >= 4 ? + NUM_PERF_PROPERTIES(properties) : + NUM_PERF_PROPERTIES(properties) - 1, .properties_ptr = (uintptr_t) properties, }; - int fd = intel_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m); + int fd = gen_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m); if (fd == -1) { DBG("Error opening gen perf OA stream: %m\n"); return false; @@ -445,7 +423,7 @@ get_metric_id(struct gen_perf_config *perf, if (!gen_perf_load_metric_id(perf, query->guid, &raw_query->oa_metrics_set_id)) { DBG("Unable to read query guid=%s ID, falling back to test config\n", query->guid); - raw_query->oa_metrics_set_id = perf->fallback_raw_oa_metric; + raw_query->oa_metrics_set_id = 1ULL; } else { DBG("Raw query '%s'guid=%s loaded ID: %"PRIu64"\n", query->name, query->guid, query->oa_metrics_set_id); @@ -570,7 +548,6 @@ gen_perf_config(struct gen_perf_context *ctx) void gen_perf_init_context(struct gen_perf_context *perf_ctx, struct gen_perf_config *perf_cfg, - void * mem_ctx, /* ralloc context */ void * ctx, /* driver context (eg, brw_context) */ void * bufmgr, /* eg brw_bufmgr */ const struct gen_device_info *devinfo, @@ -578,7 +555,6 @@ gen_perf_init_context(struct gen_perf_context *perf_ctx, int drm_fd) { perf_ctx->perf = perf_cfg; - perf_ctx->mem_ctx = mem_ctx; perf_ctx->ctx = ctx; perf_ctx->bufmgr = bufmgr; perf_ctx->drm_fd = drm_fd; @@ -586,7 +562,7 @@ gen_perf_init_context(struct gen_perf_context *perf_ctx, perf_ctx->devinfo = devinfo; perf_ctx->unaccumulated = - ralloc_array(mem_ctx, struct gen_perf_query_object *, 2); + ralloc_array(ctx, struct gen_perf_query_object *, 2); perf_ctx->unaccumulated_elements = 0; perf_ctx->unaccumulated_array_size = 2; @@ -621,7 +597,7 @@ add_to_unaccumulated_query_list(struct gen_perf_context *perf_ctx, { perf_ctx->unaccumulated_array_size *= 1.5; perf_ctx->unaccumulated = - reralloc(perf_ctx->mem_ctx, perf_ctx->unaccumulated, + reralloc(perf_ctx->ctx, perf_ctx->unaccumulated, struct gen_perf_query_object *, perf_ctx->unaccumulated_array_size); } @@ -649,42 +625,22 @@ snapshot_statistics_registers(struct gen_perf_context *ctx, perf->vtbl.store_register_mem(ctx->ctx, obj->pipeline_stats.bo, counter->pipeline_stat.reg, 8, - offset_in_bytes + counter->offset); + offset_in_bytes + i * sizeof(uint64_t)); } } static void -snapshot_query_layout(struct gen_perf_context *perf_ctx, - struct gen_perf_query_object *query, - bool end_snapshot) +snapshot_freq_register(struct gen_perf_context *ctx, + struct gen_perf_query_object *query, + uint32_t bo_offset) { - struct gen_perf_config *perf_cfg = perf_ctx->perf; - const struct gen_perf_query_field_layout *layout = &perf_cfg->query_layout; - uint32_t offset = end_snapshot ? align(layout->size, layout->alignment) : 0; - - for (uint32_t f = 0; f < layout->n_fields; f++) { - const struct gen_perf_query_field *field = - &layout->fields[end_snapshot ? f : (layout->n_fields - 1 - f)]; + struct gen_perf_config *perf = ctx->perf; + const struct gen_device_info *devinfo = ctx->devinfo; - switch (field->type) { - case GEN_PERF_QUERY_FIELD_TYPE_MI_RPC: - perf_cfg->vtbl.emit_mi_report_perf_count(perf_ctx->ctx, query->oa.bo, - offset + field->location, - query->oa.begin_report_id + - (end_snapshot ? 1 : 0)); - break; - case GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT: - case GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT: - case GEN_PERF_QUERY_FIELD_TYPE_SRM_OA_B: - case GEN_PERF_QUERY_FIELD_TYPE_SRM_OA_C: - perf_cfg->vtbl.store_register_mem(perf_ctx->ctx, query->oa.bo, - field->mmio_offset, field->size, - offset + field->location); - break; - default: - unreachable("Invalid field type"); - } - } + if (devinfo->gen == 8 && !devinfo->is_cherryview) + perf->vtbl.store_register_mem(ctx->ctx, query->oa.bo, GEN7_RPSTAT1, 4, bo_offset); + else if (devinfo->gen >= 9) + perf->vtbl.store_register_mem(ctx->ctx, query->oa.bo, GEN9_RPSTAT0, 4, bo_offset); } bool @@ -764,8 +720,8 @@ gen_perf_begin_query(struct gen_perf_context *perf_ctx, /* The period_exponent gives a sampling period as follows: * sample_period = timestamp_period * 2^(period_exponent + 1) * - * The timestamps increments every 80ns (HSW), ~52ns (GFX9LP) or - * ~83ns (GFX8/9). + * The timestamps increments every 80ns (HSW), ~52ns (GEN9LP) or + * ~83ns (GEN8/9). * * The counter overflow period is derived from the EuActive counter * which reads a counter that increments by the number of clock @@ -781,7 +737,7 @@ gen_perf_begin_query(struct gen_perf_context *perf_ctx, */ int a_counter_in_bits = 32; - if (devinfo->ver >= 8) + if (devinfo->gen >= 8) a_counter_in_bits = 40; uint64_t overflow_period = pow(2, a_counter_in_bits) / (perf_cfg->sys_vars.n_eus * @@ -845,7 +801,10 @@ gen_perf_begin_query(struct gen_perf_context *perf_ctx, query->oa.begin_report_id = perf_ctx->next_query_start_report_id; perf_ctx->next_query_start_report_id += 2; - snapshot_query_layout(perf_ctx, query, false /* end_snapshot */); + /* Take a starting OA counter snapshot. */ + perf_cfg->vtbl.emit_mi_report_perf_count(perf_ctx->ctx, query->oa.bo, 0, + query->oa.begin_report_id); + snapshot_freq_register(perf_ctx, query, MI_FREQ_START_OFFSET_BYTES); ++perf_ctx->n_active_oa_queries; @@ -921,8 +880,13 @@ gen_perf_end_query(struct gen_perf_context *perf_ctx, * from perf. In this case we mustn't try and emit a closing * MI_RPC command in case the OA unit has already been disabled */ - if (!query->oa.results_accumulated) - snapshot_query_layout(perf_ctx, query, true /* end_snapshot */); + if (!query->oa.results_accumulated) { + /* Take an ending OA counter snapshot. */ + snapshot_freq_register(perf_ctx, query, MI_FREQ_END_OFFSET_BYTES); + perf_cfg->vtbl.emit_mi_report_perf_count(perf_ctx->ctx, query->oa.bo, + MI_RPC_BO_END_OFFSET_BYTES, + query->oa.begin_report_id + 1); + } --perf_ctx->n_active_oa_queries; @@ -974,24 +938,20 @@ read_oa_samples_until(struct gen_perf_context *perf_ctx, if (len <= 0) { exec_list_push_tail(&perf_ctx->free_sample_buffers, &buf->link); - if (len == 0) { + if (len < 0) { + if (errno == EAGAIN) { + return ((last_timestamp - start_timestamp) < INT32_MAX && + (last_timestamp - start_timestamp) >= + (end_timestamp - start_timestamp)) ? + OA_READ_STATUS_FINISHED : + OA_READ_STATUS_UNFINISHED; + } else { + DBG("Error reading i915 perf samples: %m\n"); + } + } else DBG("Spurious EOF reading i915 perf samples\n"); - return OA_READ_STATUS_ERROR; - } - - if (errno != EAGAIN) { - DBG("Error reading i915 perf samples: %m\n"); - return OA_READ_STATUS_ERROR; - } - - if ((last_timestamp - start_timestamp) >= INT32_MAX) - return OA_READ_STATUS_UNFINISHED; - - if ((last_timestamp - start_timestamp) < - (end_timestamp - start_timestamp)) - return OA_READ_STATUS_UNFINISHED; - return OA_READ_STATUS_FINISHED; + return OA_READ_STATUS_ERROR; } buf->len = len; @@ -1041,8 +1001,8 @@ read_oa_samples_for_query(struct gen_perf_context *perf_ctx, if (query->oa.map == NULL) query->oa.map = perf_cfg->vtbl.bo_map(perf_ctx->ctx, query->oa.bo, MAP_READ); - start = last = query->oa.map + field_offset(false, 0); - end = query->oa.map + field_offset(true, 0); + start = last = query->oa.map; + end = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES; if (start[0] != query->oa.begin_report_id) { DBG("Spurious start report id=%"PRIu32"\n", start[0]); @@ -1056,7 +1016,8 @@ read_oa_samples_for_query(struct gen_perf_context *perf_ctx, /* Read the reports until the end timestamp. */ switch (read_oa_samples_until(perf_ctx, start[1], end[1])) { case OA_READ_STATUS_ERROR: - FALLTHROUGH; /* Let accumulate_oa_reports() deal with the error. */ + /* Fallthrough and let accumulate_oa_reports() deal with the + * error. */ case OA_READ_STATUS_FINISHED: return true; case OA_READ_STATUS_UNFINISHED: @@ -1194,8 +1155,8 @@ static bool oa_report_ctx_id_valid(const struct gen_device_info *devinfo, const uint32_t *report) { - assert(devinfo->ver >= 8); - if (devinfo->ver == 8) + assert(devinfo->gen >= 8); + if (devinfo->gen == 8) return (report[0] & (1 << 25)) != 0; return (report[0] & (1 << 16)) != 0; } @@ -1213,7 +1174,7 @@ oa_report_ctx_id_valid(const struct gen_device_info *devinfo, * * These periodic snapshots help to ensure we handle counter overflow * correctly by being frequent enough to ensure we don't miss multiple - * overflows of a counter between snapshots. For Gfx8+ the i915 perf + * overflows of a counter between snapshots. For Gen8+ the i915 perf * snapshots provide the extra context-switch reports that let us * subtract out the progress of counters associated with other * contexts running on the system. @@ -1232,8 +1193,8 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx, assert(query->oa.map != NULL); - start = last = query->oa.map + field_offset(false, 0); - end = query->oa.map + field_offset(true, 0); + start = last = query->oa.map; + end = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES; if (start[0] != query->oa.begin_report_id) { DBG("Spurious start report id=%"PRIu32"\n", start[0]); @@ -1244,10 +1205,10 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx, goto error; } - /* On Gfx12+ OA reports are sourced from per context counters, so we don't + /* On Gen12+ OA reports are sourced from per context counters, so we don't * ever have to look at the global OA buffer. Yey \o/ */ - if (perf_ctx->devinfo->ver >= 12) { + if (perf_ctx->devinfo->gen >= 12) { last = start; goto end; } @@ -1300,7 +1261,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx, goto end; } - /* For Gfx8+ since the counters continue while other + /* For Gen8+ since the counters continue while other * contexts are running we need to discount any unrelated * deltas. The hardware automatically generates a report * on context switch which gives us a new reference point @@ -1309,7 +1270,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx, * For Haswell we can rely on the HW to stop the progress * of OA counters while any other context is acctive. */ - if (devinfo->ver >= 8) { + if (devinfo->gen >= 8) { /* Consider that the current report matches our context only if * the report says the report ID is valid. */ @@ -1339,7 +1300,6 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx, if (add) { gen_perf_query_result_accumulate(&query->oa.result, query->queryinfo, - devinfo, last, report); } else { /* We're not adding the delta because we've identified it's not @@ -1368,7 +1328,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx, end: gen_perf_query_result_accumulate(&query->oa.result, query->queryinfo, - devinfo, last, end); + last, end); query->oa.results_accumulated = true; drop_from_unaccumulated_query_list(perf_ctx, query); @@ -1431,6 +1391,38 @@ gen_perf_delete_query(struct gen_perf_context *perf_ctx, free(query); } +#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) + +static void +read_gt_frequency(struct gen_perf_context *perf_ctx, + struct gen_perf_query_object *obj) +{ + const struct gen_device_info *devinfo = perf_ctx->devinfo; + uint32_t start = *((uint32_t *)(obj->oa.map + MI_FREQ_START_OFFSET_BYTES)), + end = *((uint32_t *)(obj->oa.map + MI_FREQ_END_OFFSET_BYTES)); + + switch (devinfo->gen) { + case 7: + case 8: + obj->oa.gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL; + obj->oa.gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL; + break; + case 9: + case 10: + case 11: + case 12: + obj->oa.gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL; + obj->oa.gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL; + break; + default: + unreachable("unexpected gen"); + } + + /* Put the numbers into Hz. */ + obj->oa.gt_frequency[0] *= 1000000ULL; + obj->oa.gt_frequency[1] *= 1000000ULL; +} + static int get_oa_counter_data(struct gen_perf_context *perf_ctx, struct gen_perf_query_object *query, @@ -1454,21 +1446,19 @@ get_oa_counter_data(struct gen_perf_context *perf_ctx, out_uint64 = (uint64_t *)(data + counter->offset); *out_uint64 = counter->oa_counter_read_uint64(perf_cfg, queryinfo, - &query->oa.result); + query->oa.result.accumulator); break; case GEN_PERF_COUNTER_DATA_TYPE_FLOAT: out_float = (float *)(data + counter->offset); *out_float = counter->oa_counter_read_float(perf_cfg, queryinfo, - &query->oa.result); + query->oa.result.accumulator); break; default: /* So far we aren't using uint32, double or bool32... */ unreachable("unexpected counter data type"); } - - if (counter->offset + counter_size > written) - written = counter->offset + counter_size; + written = counter->offset + counter_size; } } @@ -1535,14 +1525,13 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx, while (!read_oa_samples_for_query(perf_ctx, query, current_batch)) ; + read_gt_frequency(perf_ctx, query); uint32_t *begin_report = query->oa.map; - uint32_t *end_report = query->oa.map + perf_cfg->query_layout.size; - gen_perf_query_result_accumulate_fields(&query->oa.result, - query->queryinfo, - perf_ctx->devinfo, - begin_report, - end_report, - true /* no_oa_accumulate */); + uint32_t *end_report = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES; + gen_perf_query_result_read_frequencies(&query->oa.result, + perf_ctx->devinfo, + begin_report, + end_report); accumulate_oa_reports(perf_ctx, query); assert(query->oa.results_accumulated); @@ -1555,8 +1544,9 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx, const struct gen_device_info *devinfo = perf_ctx->devinfo; written = gen_perf_query_result_write_mdapi((uint8_t *)data, data_size, - devinfo, query->queryinfo, - &query->oa.result); + devinfo, &query->oa.result, + query->oa.gt_frequency[0], + query->oa.gt_frequency[1]); } break; diff --git a/lib/mesa/src/intel/perf/gen_perf_query.h b/lib/mesa/src/intel/perf/gen_perf_query.h index b029e01d0..d064a5d06 100644 --- a/lib/mesa/src/intel/perf/gen_perf_query.h +++ b/lib/mesa/src/intel/perf/gen_perf_query.h @@ -36,7 +36,6 @@ struct gen_perf_context *gen_perf_new_context(void *parent); void gen_perf_init_context(struct gen_perf_context *perf_ctx, struct gen_perf_config *perf_cfg, - void * mem_ctx, /* ralloc context */ void * ctx, /* driver context (eg, brw_context) */ void * bufmgr, /* eg brw_bufmgr */ const struct gen_device_info *devinfo, @@ -45,6 +44,15 @@ void gen_perf_init_context(struct gen_perf_context *perf_ctx, const struct gen_perf_query_info* gen_perf_query_info(const struct gen_perf_query_object *); + +void gen_perf_init_context(struct gen_perf_context *perf_ctx, + struct gen_perf_config *perf_cfg, + void * ctx, /* driver context (eg, brw_context) */ + void * bufmgr, /* eg brw_bufmgr */ + const struct gen_device_info *devinfo, + uint32_t hw_ctx, + int drm_fd); + struct gen_perf_config *gen_perf_config(struct gen_perf_context *ctx); int gen_perf_active_queries(struct gen_perf_context *perf_ctx, diff --git a/lib/mesa/src/intel/perf/gen_perf_regs.h b/lib/mesa/src/intel/perf/gen_perf_regs.h index 67e7ece41..1b54fe29d 100644 --- a/lib/mesa/src/intel/perf/gen_perf_regs.h +++ b/lib/mesa/src/intel/perf/gen_perf_regs.h @@ -27,50 +27,17 @@ #define INTEL_MASK(high, low) (((1u<<((high)-(low)+1))-1)<<(low)) /* GT core frequency counters */ -#define GFX7_RPSTAT1 0xA01C -#define GFX7_RPSTAT1_CURR_GT_FREQ_SHIFT 7 -#define GFX7_RPSTAT1_CURR_GT_FREQ_MASK INTEL_MASK(13, 7) -#define GFX7_RPSTAT1_PREV_GT_FREQ_SHIFT 0 -#define GFX7_RPSTAT1_PREV_GT_FREQ_MASK INTEL_MASK(6, 0) - -#define GFX9_RPSTAT0 0xA01C -#define GFX9_RPSTAT0_CURR_GT_FREQ_SHIFT 23 -#define GFX9_RPSTAT0_CURR_GT_FREQ_MASK INTEL_MASK(31, 23) -#define GFX9_RPSTAT0_PREV_GT_FREQ_SHIFT 0 -#define GFX9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0) - -/* Programmable perf 64bits counters (used for GTRequestQueueFull counter on - * gfx7-11) - */ -#define PERF_CNT_1_DW0 0x91b8 -#define PERF_CNT_2_DW0 0x91c0 -#define PERF_CNT_VALUE_MASK ((1ull << 44) - 1) - -/* Global OA perf counters */ -#define GFX7_N_OA_PERF_A32 44 -#define GFX7_OA_PERF_A32(idx) (0x2800 + (idx) * 4) - -#define GFX8_OA_PERF_TICKS 0x2910 -#define GFX8_N_OA_PERF_A64 32 -#define GFX8_N_OA_PERF_A32 4 -#define GFX8_N_OA_PERF_B32 8 -#define GFX8_N_OA_PERF_C32 8 -#define GFX8_OA_PERF_A64_LDW(idx) (0x2800 + (idx) * 8) -#define GFX8_OA_PERF_A64_UDW(idx) (0x2800 + (idx) * 8 + 4) -#define GFX8_OA_PERF_A32(idx) (0x2900 + (idx) * 4) -#define GFX8_OA_PERF_B32(idx) (0x2920 + (idx) * 4) -#define GFX8_OA_PERF_C32(idx) (0x2940 + (idx) * 4) - -#define GFX12_OAG_PERF_TICKS 0xda90 -#define GFX12_N_OAG_PERF_A64 32 -#define GFX12_N_OAG_PERF_A32 4 -#define GFX12_N_OAG_PERF_B32 8 -#define GFX12_N_OAG_PERF_C32 8 -#define GFX12_OAG_PERF_A64_LDW(idx) (0xd980 + (idx) * 8) -#define GFX12_OAG_PERF_A64_UDW(idx) (0xd980 + (idx) * 8 + 4) -#define GFX12_OAG_PERF_A32(idx) (0xda80 + (idx) * 4) -#define GFX12_OAG_PERF_B32(idx) (0xda94 + (idx) * 4) -#define GFX12_OAG_PERF_C32(idx) (0xdab4 + (idx) * 4) +#define GEN7_RPSTAT1 0xA01C +#define GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT 7 +#define GEN7_RPSTAT1_CURR_GT_FREQ_MASK INTEL_MASK(13, 7) +#define GEN7_RPSTAT1_PREV_GT_FREQ_SHIFT 0 +#define GEN7_RPSTAT1_PREV_GT_FREQ_MASK INTEL_MASK(6, 0) + +#define GEN9_RPSTAT0 0xA01C +#define GEN9_RPSTAT0_CURR_GT_FREQ_SHIFT 23 +#define GEN9_RPSTAT0_CURR_GT_FREQ_MASK INTEL_MASK(31, 23) +#define GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT 0 +#define GEN9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0) /* Pipeline statistic counters */ #define IA_VERTICES_COUNT 0x2310 @@ -87,9 +54,9 @@ #define PS_DEPTH_COUNT 0x2350 /* Stream-out counters */ -#define GFX6_SO_PRIM_STORAGE_NEEDED 0x2280 -#define GFX7_SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8) -#define GFX6_SO_NUM_PRIMS_WRITTEN 0x2288 -#define GFX7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8) +#define GEN6_SO_PRIM_STORAGE_NEEDED 0x2280 +#define GEN7_SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8) +#define GEN6_SO_NUM_PRIMS_WRITTEN 0x2288 +#define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8) #endif /* GEN_PERF_REGS_H */ |