diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2016-12-11 08:37:01 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2016-12-11 08:37:01 +0000 |
commit | e4c140c4d4d00c16d99b15ee6677cbd2e3364031 (patch) | |
tree | 96364a9f799341ef8dff3d1534f1de0eb3a559b1 /lib/mesa/src/amd/vulkan | |
parent | 10010c14c68222d4056694bf3643ee969d18cd4f (diff) |
Import Mesa 13.0.2
Diffstat (limited to 'lib/mesa/src/amd/vulkan')
44 files changed, 2853 insertions, 9190 deletions
diff --git a/lib/mesa/src/amd/vulkan/Makefile.am b/lib/mesa/src/amd/vulkan/Makefile.am index a645432e7..c559a9503 100644 --- a/lib/mesa/src/amd/vulkan/Makefile.am +++ b/lib/mesa/src/amd/vulkan/Makefile.am @@ -21,7 +21,9 @@ include Makefile.sources -noinst_HEADERS = \ +vulkan_includedir = $(includedir)/vulkan + +vulkan_include_HEADERS = \ $(top_srcdir)/include/vulkan/vk_platform.h \ $(top_srcdir)/include/vulkan/vulkan.h @@ -30,12 +32,13 @@ lib_LTLIBRARIES = libvulkan_radeon.la # The gallium includes are for the util/u_math.h include from main/macros.h AM_CPPFLAGS = \ + $(AMDGPU_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ -I$(top_srcdir)/include \ -I$(top_builddir)/src \ -I$(top_srcdir)/src \ -I$(top_srcdir)/src/vulkan/wsi \ - -I$(top_builddir)/src/vulkan/util \ - -I$(top_srcdir)/src/vulkan/util \ -I$(top_srcdir)/src/amd \ -I$(top_srcdir)/src/amd/common \ -I$(top_builddir)/src/compiler \ @@ -45,10 +48,7 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/mesa \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ -I$(top_srcdir)/src/gallium/auxiliary \ - -I$(top_srcdir)/src/gallium/include \ - $(AMDGPU_CFLAGS) \ - $(VALGRIND_CFLAGS) \ - $(DEFINES) + -I$(top_srcdir)/src/gallium/include AM_CFLAGS = \ $(VISIBILITY_CFLAGS) \ @@ -59,22 +59,8 @@ VULKAN_SOURCES = \ $(VULKAN_GENERATED_FILES) \ $(VULKAN_FILES) -VULKAN_LIB_DEPS = \ - libvulkan_common.la \ - $(top_builddir)/src/vulkan/libvulkan_util.la \ - $(top_builddir)/src/vulkan/libvulkan_wsi.la \ - $(top_builddir)/src/amd/common/libamd_common.la \ - $(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \ - $(top_builddir)/src/compiler/nir/libnir.la \ - $(top_builddir)/src/util/libmesautil.la \ - $(LLVM_LIBS) \ - $(LIBELF_LIBS) \ - $(PTHREAD_LIBS) \ - $(AMDGPU_LIBS) \ - $(LIBDRM_LIBS) \ - $(PTHREAD_LIBS) \ - $(DLOPEN_LIBS) \ - -lm +VULKAN_LIB_DEPS = + if HAVE_PLATFORM_X11 AM_CPPFLAGS += \ @@ -84,37 +70,61 @@ AM_CPPFLAGS += \ VULKAN_SOURCES += $(VULKAN_WSI_X11_FILES) -VULKAN_LIB_DEPS += $(XCB_DRI3_LIBS) +# FIXME: Use pkg-config for X11-xcb ldflags. +VULKAN_LIB_DEPS += $(XCB_DRI3_LIBS) -lX11-xcb endif if HAVE_PLATFORM_WAYLAND AM_CPPFLAGS += \ + -I$(top_builddir)/src/egl/wayland/wayland-drm \ + -I$(top_srcdir)/src/egl/wayland/wayland-drm \ $(WAYLAND_CFLAGS) \ -DVK_USE_PLATFORM_WAYLAND_KHR VULKAN_SOURCES += $(VULKAN_WSI_WAYLAND_FILES) VULKAN_LIB_DEPS += \ + $(top_builddir)/src/egl/wayland/wayland-drm/libwayland-drm.la \ $(WAYLAND_LIBS) endif noinst_LTLIBRARIES = libvulkan_common.la libvulkan_common_la_SOURCES = $(VULKAN_SOURCES) +VULKAN_LIB_DEPS += \ + libvulkan_common.la \ + $(top_builddir)/src/vulkan/wsi/libvulkan_wsi.la \ + $(top_builddir)/src/amd/common/libamd_common.la \ + $(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \ + $(top_builddir)/src/compiler/nir/libnir.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(LLVM_LIBS) \ + $(LIBELF_LIBS) \ + $(PTHREAD_LIBS) \ + $(AMDGPU_LIBS) \ + $(LIBDRM_LIBS) \ + $(PTHREAD_LIBS) \ + $(DLOPEN_LIBS) \ + -lm + nodist_EXTRA_libvulkan_radeon_la_SOURCES = dummy.cpp libvulkan_radeon_la_SOURCES = $(VULKAN_GEM_FILES) -vulkan_api_xml = $(top_srcdir)/src/vulkan/registry/vk.xml - -radv_entrypoints.h : radv_entrypoints_gen.py $(vulkan_api_xml) - $(AM_V_GEN) cat $(vulkan_api_xml) |\ +radv_entrypoints.h : radv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\ $(PYTHON2) $(srcdir)/radv_entrypoints_gen.py header > $@ -radv_entrypoints.c : radv_entrypoints_gen.py $(vulkan_api_xml) - $(AM_V_GEN) cat $(vulkan_api_xml) |\ +radv_entrypoints.c : radv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\ $(PYTHON2) $(srcdir)/radv_entrypoints_gen.py code > $@ +.PHONY: radv_timestamp.h + +radv_timestamp.h: + @echo "Updating radv_timestamp.h" + $(AM_V_GEN) echo "#define RADV_TIMESTAMP \"$(TIMESTAMP_CMD)\"" > $@ + vk_format_table.c: vk_format_table.py \ vk_format_parse.py \ vk_format_layout.csv diff --git a/lib/mesa/src/amd/vulkan/Makefile.sources b/lib/mesa/src/amd/vulkan/Makefile.sources index 489695215..d163b9807 100644 --- a/lib/mesa/src/amd/vulkan/Makefile.sources +++ b/lib/mesa/src/amd/vulkan/Makefile.sources @@ -33,7 +33,6 @@ RADV_WS_AMDGPU_FILES := \ VULKAN_FILES := \ radv_cmd_buffer.c \ radv_cs.h \ - radv_debug.h \ radv_device.c \ radv_descriptor_set.c \ radv_descriptor_set.h \ @@ -73,5 +72,6 @@ VULKAN_WSI_X11_FILES := \ VULKAN_GENERATED_FILES := \ radv_entrypoints.c \ - radv_entrypoints.h + radv_entrypoints.h \ + radv_timestamp.h diff --git a/lib/mesa/src/amd/vulkan/radv_cmd_buffer.c b/lib/mesa/src/amd/vulkan/radv_cmd_buffer.c index fd155411f..9517e7a13 100644 --- a/lib/mesa/src/amd/vulkan/radv_cmd_buffer.c +++ b/lib/mesa/src/amd/vulkan/radv_cmd_buffer.c @@ -32,15 +32,11 @@ #include "vk_format.h" #include "radv_meta.h" -#include "ac_debug.h" - static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, VkImageLayout dst_layout, - uint32_t src_family, - uint32_t dst_family, - const VkImageSubresourceRange *range, + VkImageSubresourceRange range, VkImageAspectFlags pending_clears); const struct radv_dynamic_state default_dynamic_state = { @@ -114,25 +110,6 @@ radv_dynamic_state_copy(struct radv_dynamic_state *dest, dest->stencil_reference = src->stencil_reference; } -bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) -{ - return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && - cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; -} - -enum ring_type radv_queue_family_to_ring(int f) { - switch (f) { - case RADV_QUEUE_GENERAL: - return RING_GFX; - case RADV_QUEUE_COMPUTE: - return RING_COMPUTE; - case RADV_QUEUE_TRANSFER: - return RING_DMA; - default: - unreachable("Unknown queue family"); - } -} - static VkResult radv_create_cmd_buffer( struct radv_device * device, struct radv_cmd_pool * pool, @@ -141,7 +118,7 @@ static VkResult radv_create_cmd_buffer( { struct radv_cmd_buffer *cmd_buffer; VkResult result; - unsigned ring; + cmd_buffer = vk_alloc(&pool->alloc, sizeof(*cmd_buffer), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (cmd_buffer == NULL) @@ -155,19 +132,14 @@ static VkResult radv_create_cmd_buffer( if (pool) { list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); - cmd_buffer->queue_family_index = pool->queue_family_index; - } else { /* Init the pool_link so we can safefly call list_del when we destroy * the command buffer */ list_inithead(&cmd_buffer->pool_link); - cmd_buffer->queue_family_index = RADV_QUEUE_GENERAL; } - ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index); - - cmd_buffer->cs = device->ws->cs_create(device->ws, ring); + cmd_buffer->cs = device->ws->cs_create(device->ws, RING_GFX); if (!cmd_buffer->cs) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto fail; @@ -187,54 +159,6 @@ fail: return result; } -static void -radv_cmd_buffer_destroy(struct radv_cmd_buffer *cmd_buffer) -{ - list_del(&cmd_buffer->pool_link); - - list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, - &cmd_buffer->upload.list, list) { - cmd_buffer->device->ws->buffer_destroy(up->upload_bo); - list_del(&up->list); - free(up); - } - - if (cmd_buffer->upload.upload_bo) - cmd_buffer->device->ws->buffer_destroy(cmd_buffer->upload.upload_bo); - cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs); - free(cmd_buffer->push_descriptors.set.mapped_ptr); - vk_free(&cmd_buffer->pool->alloc, cmd_buffer); -} - -static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) -{ - - cmd_buffer->device->ws->cs_reset(cmd_buffer->cs); - - list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, - &cmd_buffer->upload.list, list) { - cmd_buffer->device->ws->buffer_destroy(up->upload_bo); - list_del(&up->list); - free(up); - } - - cmd_buffer->scratch_size_needed = 0; - cmd_buffer->compute_scratch_size_needed = 0; - cmd_buffer->esgs_ring_size_needed = 0; - cmd_buffer->gsvs_ring_size_needed = 0; - cmd_buffer->tess_rings_needed = false; - cmd_buffer->sample_positions_needed = false; - - if (cmd_buffer->upload.upload_bo) - cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, - cmd_buffer->upload.upload_bo, 8); - cmd_buffer->upload.offset = 0; - - cmd_buffer->record_fail = false; - - cmd_buffer->ring_offsets_idx = -1; -} - static bool radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t min_needed) @@ -322,32 +246,6 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, return true; } -void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) -{ - struct radv_device *device = cmd_buffer->device; - struct radeon_winsys_cs *cs = cmd_buffer->cs; - uint64_t va; - - if (!device->trace_bo) - return; - - va = device->ws->buffer_get_va(device->trace_bo); - - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7); - - ++cmd_buffer->state.trace_id; - device->ws->cs_add_buffer(cs, device->trace_bo, 8); - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, cmd_buffer->state.trace_id); - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id)); -} - static void radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) @@ -378,58 +276,6 @@ static unsigned radv_pack_float_12p4(float x) x >= 4096 ? 0xffff : x * 16; } -static uint32_t -shader_stage_to_user_data_0(gl_shader_stage stage, bool has_gs, bool has_tess) -{ - switch (stage) { - case MESA_SHADER_FRAGMENT: - return R_00B030_SPI_SHADER_USER_DATA_PS_0; - case MESA_SHADER_VERTEX: - if (has_tess) - return R_00B530_SPI_SHADER_USER_DATA_LS_0; - else - return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B130_SPI_SHADER_USER_DATA_VS_0; - case MESA_SHADER_GEOMETRY: - return R_00B230_SPI_SHADER_USER_DATA_GS_0; - case MESA_SHADER_COMPUTE: - return R_00B900_COMPUTE_USER_DATA_0; - case MESA_SHADER_TESS_CTRL: - return R_00B430_SPI_SHADER_USER_DATA_HS_0; - case MESA_SHADER_TESS_EVAL: - if (has_gs) - return R_00B330_SPI_SHADER_USER_DATA_ES_0; - else - return R_00B130_SPI_SHADER_USER_DATA_VS_0; - default: - unreachable("unknown shader"); - } -} - -static struct ac_userdata_info * -radv_lookup_user_sgpr(struct radv_pipeline *pipeline, - gl_shader_stage stage, - int idx) -{ - return &pipeline->shaders[stage]->info.user_sgprs_locs.shader_data[idx]; -} - -static void -radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline, - gl_shader_stage stage, - int idx, uint64_t va) -{ - struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); - uint32_t base_reg = shader_stage_to_user_data_0(stage, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline)); - if (loc->sgpr_idx == -1) - return; - assert(loc->num_sgprs == 2); - assert(!loc->indirect); - radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 2); - radeon_emit(cmd_buffer->cs, va); - radeon_emit(cmd_buffer->cs, va >> 32); -} - static void radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) @@ -442,9 +288,6 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_mask[0]); radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_mask[1]); - radeon_set_context_reg(cmd_buffer->cs, CM_R_028804_DB_EQAA, ms->db_eqaa); - radeon_set_context_reg(cmd_buffer->cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1); - if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples) return; @@ -452,37 +295,41 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cmd_buffer->cs, ms->pa_sc_line_cntl); radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_config); - radv_cayman_emit_msaa_sample_locs(cmd_buffer->cs, num_samples); + radeon_set_context_reg(cmd_buffer->cs, CM_R_028804_DB_EQAA, ms->db_eqaa); + radeon_set_context_reg(cmd_buffer->cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1); - if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_positions) { - uint32_t offset; - struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_FRAGMENT, AC_UD_PS_SAMPLE_POS_OFFSET); - uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_FRAGMENT, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline)); - if (loc->sgpr_idx == -1) - return; - assert(loc->num_sgprs == 1); - assert(!loc->indirect); - switch (num_samples) { - default: - offset = 0; - break; - case 2: - offset = 1; - break; - case 4: - offset = 3; - break; - case 8: - offset = 7; - break; - case 16: - offset = 15; - break; - } + radv_cayman_emit_msaa_sample_locs(cmd_buffer->cs, num_samples); - radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, offset); - cmd_buffer->sample_positions_needed = true; + uint32_t samples_offset; + void *samples_ptr; + void *src; + radv_cmd_buffer_upload_alloc(cmd_buffer, num_samples * 4 * 2, 256, &samples_offset, + &samples_ptr); + switch (num_samples) { + case 1: + src = cmd_buffer->device->sample_locations_1x; + break; + case 2: + src = cmd_buffer->device->sample_locations_2x; + break; + case 4: + src = cmd_buffer->device->sample_locations_4x; + break; + case 8: + src = cmd_buffer->device->sample_locations_8x; + break; + case 16: + src = cmd_buffer->device->sample_locations_16x; + break; } + memcpy(samples_ptr, src, num_samples * 4 * 2); + + uint64_t va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo); + va += samples_offset; + + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 + AC_USERDATA_PS_SAMPLE_POS * 4, 2); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, va >> 32); } static void @@ -498,8 +345,7 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer, raster->spi_interp_control); radeon_set_context_reg_seq(cmd_buffer->cs, R_028A00_PA_SU_POINT_SIZE, 2); - unsigned tmp = (unsigned)(1.0 * 8.0); - radeon_emit(cmd_buffer->cs, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); + radeon_emit(cmd_buffer->cs, 0); radeon_emit(cmd_buffer->cs, S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) | S_028A04_MAX_SIZE(radv_pack_float_12p4(8192/2))); /* R_028A04_PA_SU_POINT_MINMAX */ @@ -511,39 +357,47 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer, } static void -radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline, - struct radv_shader_variant *shader, - struct ac_vs_output_info *outinfo) +radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer, + struct radv_pipeline *pipeline) { struct radeon_winsys *ws = cmd_buffer->device->ws; - uint64_t va = ws->buffer_get_va(shader->bo); + struct radv_shader_variant *vs; + uint64_t va; unsigned export_count; + unsigned clip_dist_mask, cull_dist_mask, total_mask; + + assert (pipeline->shaders[MESA_SHADER_VERTEX]); + + vs = pipeline->shaders[MESA_SHADER_VERTEX]; + va = ws->buffer_get_va(vs->bo); + ws->cs_add_buffer(cmd_buffer->cs, vs->bo, 8); - ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); + clip_dist_mask = vs->info.vs.clip_dist_mask; + cull_dist_mask = vs->info.vs.cull_dist_mask; + total_mask = clip_dist_mask | cull_dist_mask; + radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, 0); + radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, 0); - export_count = MAX2(1, outinfo->param_exports); + export_count = MAX2(1, vs->info.vs.param_exports); radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG, S_0286C4_VS_EXPORT_COUNT(export_count - 1)); - radeon_set_context_reg(cmd_buffer->cs, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? + S_02870C_POS1_EXPORT_FORMAT(vs->info.vs.pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? + S_02870C_POS2_EXPORT_FORMAT(vs->info.vs.pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? + S_02870C_POS3_EXPORT_FORMAT(vs->info.vs.pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE)); - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4); radeon_emit(cmd_buffer->cs, va >> 8); radeon_emit(cmd_buffer->cs, va >> 40); - radeon_emit(cmd_buffer->cs, shader->rsrc1); - radeon_emit(cmd_buffer->cs, shader->rsrc2); + radeon_emit(cmd_buffer->cs, vs->rsrc1); + radeon_emit(cmd_buffer->cs, vs->rsrc2); radeon_set_context_reg(cmd_buffer->cs, R_028818_PA_CL_VTE_CNTL, S_028818_VTX_W0_FMT(1) | @@ -551,236 +405,34 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); - radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL, - pipeline->graphics.pa_cl_vs_out_cntl); - - radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF, - S_028AB4_REUSE_OFF(outinfo->writes_viewport_index)); -} - -static void -radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, - struct radv_shader_variant *shader, - struct ac_es_output_info *outinfo) -{ - struct radeon_winsys *ws = cmd_buffer->device->ws; - uint64_t va = ws->buffer_get_va(shader->bo); - - ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); - - radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, - outinfo->esgs_itemsize / 4); - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4); - radeon_emit(cmd_buffer->cs, va >> 8); - radeon_emit(cmd_buffer->cs, va >> 40); - radeon_emit(cmd_buffer->cs, shader->rsrc1); - radeon_emit(cmd_buffer->cs, shader->rsrc2); -} - -static void -radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer, - struct radv_shader_variant *shader) -{ - struct radeon_winsys *ws = cmd_buffer->device->ws; - uint64_t va = ws->buffer_get_va(shader->bo); - uint32_t rsrc2 = shader->rsrc2; - - ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2); - radeon_emit(cmd_buffer->cs, va >> 8); - radeon_emit(cmd_buffer->cs, va >> 40); - - rsrc2 |= S_00B52C_LDS_SIZE(cmd_buffer->state.pipeline->graphics.tess.lds_size); - if (cmd_buffer->device->physical_device->rad_info.chip_class == CIK && - cmd_buffer->device->physical_device->rad_info.family != CHIP_HAWAII) - radeon_set_sh_reg(cmd_buffer->cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2); - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); - radeon_emit(cmd_buffer->cs, shader->rsrc1); - radeon_emit(cmd_buffer->cs, rsrc2); -} - -static void -radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, - struct radv_shader_variant *shader) -{ - struct radeon_winsys *ws = cmd_buffer->device->ws; - uint64_t va = ws->buffer_get_va(shader->bo); - - ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4); - radeon_emit(cmd_buffer->cs, va >> 8); - radeon_emit(cmd_buffer->cs, va >> 40); - radeon_emit(cmd_buffer->cs, shader->rsrc1); - radeon_emit(cmd_buffer->cs, shader->rsrc2); -} - -static void -radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) -{ - struct radv_shader_variant *vs; - - assert (pipeline->shaders[MESA_SHADER_VERTEX]); - - vs = pipeline->shaders[MESA_SHADER_VERTEX]; - - if (vs->info.vs.as_ls) - radv_emit_hw_ls(cmd_buffer, vs); - else if (vs->info.vs.as_es) - radv_emit_hw_es(cmd_buffer, vs, &vs->info.vs.es_info); - else - radv_emit_hw_vs(cmd_buffer, pipeline, vs, &vs->info.vs.outinfo); - - radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, 0); -} - - -static void -radv_emit_tess_shaders(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) -{ - if (!radv_pipeline_has_tess(pipeline)) - return; - - struct radv_shader_variant *tes, *tcs; - - tcs = pipeline->shaders[MESA_SHADER_TESS_CTRL]; - tes = pipeline->shaders[MESA_SHADER_TESS_EVAL]; - - if (tes->info.tes.as_es) - radv_emit_hw_es(cmd_buffer, tes, &tes->info.tes.es_info); - else - radv_emit_hw_vs(cmd_buffer, pipeline, tes, &tes->info.tes.outinfo); + S_02881C_USE_VTX_POINT_SIZE(vs->info.vs.writes_pointsize) | + S_02881C_VS_OUT_MISC_VEC_ENA(vs->info.vs.writes_pointsize) | + S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | + S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | + pipeline->graphics.raster.pa_cl_vs_out_cntl | + cull_dist_mask << 8 | + clip_dist_mask); - radv_emit_hw_hs(cmd_buffer, tcs); - - radeon_set_context_reg(cmd_buffer->cs, R_028B6C_VGT_TF_PARAM, - pipeline->graphics.tess.tf_param); - - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) - radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, - pipeline->graphics.tess.ls_hs_config); - else - radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, - pipeline->graphics.tess.ls_hs_config); - - struct ac_userdata_info *loc; - - loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_CTRL, AC_UD_TCS_OFFCHIP_LAYOUT); - if (loc->sgpr_idx != -1) { - uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_TESS_CTRL, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline)); - assert(loc->num_sgprs == 4); - assert(!loc->indirect); - radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 4); - radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.offchip_layout); - radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.tcs_out_offsets); - radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.tcs_out_layout | - pipeline->graphics.tess.num_tcs_input_cp << 26); - radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.tcs_in_layout); - } - - loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_EVAL, AC_UD_TES_OFFCHIP_LAYOUT); - if (loc->sgpr_idx != -1) { - uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_TESS_EVAL, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline)); - assert(loc->num_sgprs == 1); - assert(!loc->indirect); - - radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, - pipeline->graphics.tess.offchip_layout); - } - - loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, AC_UD_VS_LS_TCS_IN_LAYOUT); - if (loc->sgpr_idx != -1) { - uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline)); - assert(loc->num_sgprs == 1); - assert(!loc->indirect); - - radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, - pipeline->graphics.tess.tcs_in_layout); - } } -static void -radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) -{ - struct radeon_winsys *ws = cmd_buffer->device->ws; - struct radv_shader_variant *gs; - uint64_t va; - - radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, pipeline->graphics.vgt_gs_mode); - - gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; - if (!gs) - return; - - uint32_t gsvs_itemsize = gs->info.gs.max_gsvs_emit_size >> 2; - - radeon_set_context_reg_seq(cmd_buffer->cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3); - radeon_emit(cmd_buffer->cs, gsvs_itemsize); - radeon_emit(cmd_buffer->cs, gsvs_itemsize); - radeon_emit(cmd_buffer->cs, gsvs_itemsize); - - radeon_set_context_reg(cmd_buffer->cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize); - - radeon_set_context_reg(cmd_buffer->cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out); - - uint32_t gs_vert_itemsize = gs->info.gs.gsvs_vertex_size; - radeon_set_context_reg_seq(cmd_buffer->cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4); - radeon_emit(cmd_buffer->cs, gs_vert_itemsize >> 2); - radeon_emit(cmd_buffer->cs, 0); - radeon_emit(cmd_buffer->cs, 0); - radeon_emit(cmd_buffer->cs, 0); - - uint32_t gs_num_invocations = gs->info.gs.invocations; - radeon_set_context_reg(cmd_buffer->cs, R_028B90_VGT_GS_INSTANCE_CNT, - S_028B90_CNT(MIN2(gs_num_invocations, 127)) | - S_028B90_ENABLE(gs_num_invocations > 0)); - - va = ws->buffer_get_va(gs->bo); - ws->cs_add_buffer(cmd_buffer->cs, gs->bo, 8); - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); - radeon_emit(cmd_buffer->cs, va >> 8); - radeon_emit(cmd_buffer->cs, va >> 40); - radeon_emit(cmd_buffer->cs, gs->rsrc1); - radeon_emit(cmd_buffer->cs, gs->rsrc2); - - radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader, &pipeline->gs_copy_shader->info.vs.outinfo); - - struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, - AC_UD_GS_VS_RING_STRIDE_ENTRIES); - if (loc->sgpr_idx != -1) { - uint32_t stride = gs->info.gs.max_gsvs_emit_size; - uint32_t num_entries = 64; - bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI; - - if (is_vi) - num_entries *= stride; - stride = S_008F04_STRIDE(stride); - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B230_SPI_SHADER_USER_DATA_GS_0 + loc->sgpr_idx * 4, 2); - radeon_emit(cmd_buffer->cs, stride); - radeon_emit(cmd_buffer->cs, num_entries); - } -} static void radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) { struct radeon_winsys *ws = cmd_buffer->device->ws; - struct radv_shader_variant *ps; + struct radv_shader_variant *ps, *vs; uint64_t va; unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); struct radv_blend_state *blend = &pipeline->graphics.blend; + unsigned ps_offset = 0; + unsigned z_order; assert (pipeline->shaders[MESA_SHADER_FRAGMENT]); ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; - + vs = pipeline->shaders[MESA_SHADER_VERTEX]; va = ws->buffer_get_va(ps->bo); ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8); @@ -790,8 +442,20 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cmd_buffer->cs, ps->rsrc1); radeon_emit(cmd_buffer->cs, ps->rsrc2); + if (ps->info.fs.early_fragment_test || !ps->info.fs.writes_memory) + z_order = V_02880C_EARLY_Z_THEN_LATE_Z; + else + z_order = V_02880C_LATE_Z; + + radeon_set_context_reg(cmd_buffer->cs, R_02880C_DB_SHADER_CONTROL, - pipeline->graphics.db_shader_control); + S_02880C_Z_EXPORT_ENABLE(ps->info.fs.writes_z) | + S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.fs.writes_stencil) | + S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) | + S_02880C_Z_ORDER(z_order) | + S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) | + S_02880C_EXEC_ON_HIER_FAIL(ps->info.fs.writes_memory) | + S_02880C_EXEC_ON_NOOP(ps->info.fs.writes_memory)); radeon_set_context_reg(cmd_buffer->cs, R_0286CC_SPI_PS_INPUT_ENA, ps->config.spi_ps_input_ena); @@ -799,43 +463,51 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, radeon_set_context_reg(cmd_buffer->cs, R_0286D0_SPI_PS_INPUT_ADDR, ps->config.spi_ps_input_addr); - if (ps->info.fs.force_persample) - spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2); - + spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2); radeon_set_context_reg(cmd_buffer->cs, R_0286D8_SPI_PS_IN_CONTROL, S_0286D8_NUM_INTERP(ps->info.fs.num_interp)); radeon_set_context_reg(cmd_buffer->cs, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); radeon_set_context_reg(cmd_buffer->cs, R_028710_SPI_SHADER_Z_FORMAT, - pipeline->graphics.shader_z_format); + ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR : + ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R : + V_028710_SPI_SHADER_ZERO); radeon_set_context_reg(cmd_buffer->cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask); radeon_set_context_reg(cmd_buffer->cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); - if (pipeline->graphics.ps_input_cntl_num) { - radeon_set_context_reg_seq(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cntl_num); - for (unsigned i = 0; i < pipeline->graphics.ps_input_cntl_num; i++) { - radeon_emit(cmd_buffer->cs, pipeline->graphics.ps_input_cntl[i]); - } + if (ps->info.fs.has_pcoord) { + unsigned val; + val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20); + radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); + ps_offset = 1; } -} -static void polaris_set_vgt_vertex_reuse(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) -{ - uint32_t vtx_reuse_depth = 30; - if (cmd_buffer->device->physical_device->rad_info.family < CHIP_POLARIS10) - return; + for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) { + unsigned vs_offset, flat_shade; + unsigned val; + + if (!(ps->info.fs.input_mask & (1u << i))) + continue; + + + if (!(vs->info.vs.export_mask & (1u << i))) { + radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, + S_028644_OFFSET(0x20)); + ++ps_offset; + continue; + } + + vs_offset = util_bitcount(vs->info.vs.export_mask & ((1u << i) - 1)); + flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset)); - if (pipeline->shaders[MESA_SHADER_TESS_EVAL]) { - if (pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD) - vtx_reuse_depth = 14; + val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); + radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); + ++ps_offset; } - radeon_set_context_reg(cmd_buffer->cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, - vtx_reuse_depth); } static void @@ -850,23 +522,11 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer, radv_emit_graphics_raster_state(cmd_buffer, pipeline); radv_update_multisample_state(cmd_buffer, pipeline); radv_emit_vertex_shader(cmd_buffer, pipeline); - radv_emit_tess_shaders(cmd_buffer, pipeline); - radv_emit_geometry_shader(cmd_buffer, pipeline); radv_emit_fragment_shader(cmd_buffer, pipeline); - polaris_set_vgt_vertex_reuse(cmd_buffer, pipeline); - - cmd_buffer->scratch_size_needed = - MAX2(cmd_buffer->scratch_size_needed, - pipeline->max_waves * pipeline->scratch_bytes_per_wave); - radeon_set_context_reg(cmd_buffer->cs, R_0286E8_SPI_TMPRING_SIZE, - S_0286E8_WAVES(pipeline->max_waves) | - S_0286E8_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); + radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, + pipeline->graphics.prim_restart_enable); - if (!cmd_buffer->state.emitted_pipeline || - cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband != - pipeline->graphics.can_use_guardband) - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; cmd_buffer->state.emitted_pipeline = pipeline; } @@ -882,9 +542,7 @@ radv_emit_scissor(struct radv_cmd_buffer *cmd_buffer) { uint32_t count = cmd_buffer->state.dynamic.scissor.count; si_write_scissors(cmd_buffer->cs, 0, count, - cmd_buffer->state.dynamic.scissor.scissors, - cmd_buffer->state.dynamic.viewport.viewports, - cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband); + cmd_buffer->state.dynamic.scissor.scissors); radeon_set_context_reg(cmd_buffer->cs, R_028A48_PA_SC_MODE_CNTL_0, cmd_buffer->state.pipeline->graphics.ms.pa_sc_mode_cntl_0 | S_028A48_VPORT_SCISSOR_ENABLE(count ? 1 : 0)); } @@ -894,7 +552,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, struct radv_color_buffer_info *cb) { - bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI; + bool is_vi = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= VI; radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); radeon_emit(cmd_buffer->cs, cb->cb_color_base); radeon_emit(cmd_buffer->cs, cb->cb_color_pitch); @@ -986,7 +644,7 @@ radv_set_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer, va += image->offset + image->clear_value_offset; unsigned reg_offset = 0, reg_count = 0; - if (!image->surface.htile_size || !aspects) + if (!image->htile.size || !aspects) return; if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { @@ -1025,7 +683,7 @@ radv_load_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer, uint64_t va = cmd_buffer->device->ws->buffer_get_va(image->bo); va += image->offset + image->clear_value_offset; - if (!image->surface.htile_size) + if (!image->htile.size) return; cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, image->bo, 8); @@ -1160,13 +818,13 @@ void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer) uint32_t db_count_control; if(!cmd_buffer->state.active_occlusion_queries) { - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) { db_count_control = 0; } else { db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); } } else { - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) { db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | S_028004_SAMPLE_RATE(0) | /* TODO: set this to the number of samples of the current framebuffer */ S_028004_ZPASS_ENABLE(1) | @@ -1186,15 +844,6 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer) { struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - if (G_028810_DX_RASTERIZATION_KILL(cmd_buffer->state.pipeline->graphics.raster.pa_cl_clip_cntl)) - return; - - if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT)) - radv_emit_viewport(cmd_buffer); - - if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT)) - radv_emit_scissor(cmd_buffer); - if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) { unsigned width = cmd_buffer->state.dynamic.line_width * 8; radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL, @@ -1246,118 +895,9 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer) } static void -emit_stage_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline, - int idx, - uint64_t va, - gl_shader_stage stage) -{ - struct ac_userdata_info *desc_set_loc = &pipeline->shaders[stage]->info.user_sgprs_locs.descriptor_sets[idx]; - uint32_t base_reg = shader_stage_to_user_data_0(stage, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline)); - - if (desc_set_loc->sgpr_idx == -1) - return; - - assert(!desc_set_loc->indirect); - assert(desc_set_loc->num_sgprs == 2); - radeon_set_sh_reg_seq(cmd_buffer->cs, - base_reg + desc_set_loc->sgpr_idx * 4, 2); - radeon_emit(cmd_buffer->cs, va); - radeon_emit(cmd_buffer->cs, va >> 32); -} - -static void -radv_emit_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer, - VkShaderStageFlags stages, - struct radv_descriptor_set *set, - unsigned idx) -{ - if (cmd_buffer->state.pipeline) { - if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) - emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline, - idx, set->va, - MESA_SHADER_FRAGMENT); - - if (stages & VK_SHADER_STAGE_VERTEX_BIT) - emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline, - idx, set->va, - MESA_SHADER_VERTEX); - - if ((stages & VK_SHADER_STAGE_GEOMETRY_BIT) && radv_pipeline_has_gs(cmd_buffer->state.pipeline)) - emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline, - idx, set->va, - MESA_SHADER_GEOMETRY); - - if ((stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) && radv_pipeline_has_tess(cmd_buffer->state.pipeline)) - emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline, - idx, set->va, - MESA_SHADER_TESS_CTRL); - - if ((stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) && radv_pipeline_has_tess(cmd_buffer->state.pipeline)) - emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline, - idx, set->va, - MESA_SHADER_TESS_EVAL); - } - - if (cmd_buffer->state.compute_pipeline && (stages & VK_SHADER_STAGE_COMPUTE_BIT)) - emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.compute_pipeline, - idx, set->va, - MESA_SHADER_COMPUTE); -} - -static void -radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer) -{ - struct radv_descriptor_set *set = &cmd_buffer->push_descriptors.set; - uint32_t *ptr = NULL; - unsigned bo_offset; - - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, set->size, 32, - &bo_offset, - (void**) &ptr)) - return; - - set->va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo); - set->va += bo_offset; - - memcpy(ptr, set->mapped_ptr, set->size); -} - -static void -radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, - VkShaderStageFlags stages) -{ - unsigned i; - if (!cmd_buffer->state.descriptors_dirty) - return; - - if (cmd_buffer->state.push_descriptors_dirty) - radv_flush_push_descriptors(cmd_buffer); - - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, - cmd_buffer->cs, - MAX_SETS * MESA_SHADER_STAGES * 4); - - for (i = 0; i < MAX_SETS; i++) { - if (!(cmd_buffer->state.descriptors_dirty & (1 << i))) - continue; - struct radv_descriptor_set *set = cmd_buffer->state.descriptors[i]; - if (!set) - continue; - - radv_emit_descriptor_set_userdata(cmd_buffer, stages, set, i); - } - cmd_buffer->state.descriptors_dirty = 0; - cmd_buffer->state.push_descriptors_dirty = false; - assert(cmd_buffer->cs->cdw <= cdw_max); -} - -static void radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline, - VkShaderStageFlags stages) -{ - struct radv_pipeline_layout *layout = pipeline->layout; + struct radv_pipeline_layout *layout, + VkShaderStageFlags stages) { unsigned offset; void *ptr; uint64_t va; @@ -1366,10 +906,9 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, if (!stages || !layout || (!layout->push_constant_size && !layout->dynamic_offset_count)) return; - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size + - 16 * layout->dynamic_offset_count, - 256, &offset, &ptr)) - return; + radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size + + 16 * layout->dynamic_offset_count, + 256, &offset, &ptr); memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size); memcpy((char*)ptr + layout->push_constant_size, cmd_buffer->dynamic_buffers, @@ -1378,70 +917,40 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo); va += offset; - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, - cmd_buffer->cs, MESA_SHADER_STAGES * 4); - if (stages & VK_SHADER_STAGE_VERTEX_BIT) - radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_VERTEX, - AC_UD_PUSH_CONSTANTS, va); - - if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) - radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_FRAGMENT, - AC_UD_PUSH_CONSTANTS, va); - - if ((stages & VK_SHADER_STAGE_GEOMETRY_BIT) && radv_pipeline_has_gs(pipeline)) - radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_GEOMETRY, - AC_UD_PUSH_CONSTANTS, va); - - if ((stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) && radv_pipeline_has_tess(pipeline)) - radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_TESS_CTRL, - AC_UD_PUSH_CONSTANTS, va); - - if ((stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) && radv_pipeline_has_tess(pipeline)) - radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_TESS_EVAL, - AC_UD_PUSH_CONSTANTS, va); - - if (stages & VK_SHADER_STAGE_COMPUTE_BIT) - radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_COMPUTE, - AC_UD_PUSH_CONSTANTS, va); - - cmd_buffer->push_constant_stages &= ~stages; - assert(cmd_buffer->cs->cdw <= cdw_max); -} - -static void radv_emit_primitive_reset_state(struct radv_cmd_buffer *cmd_buffer, - bool indexed_draw) -{ - int32_t primitive_reset_en = indexed_draw && cmd_buffer->state.pipeline->graphics.prim_restart_enable; - - if (primitive_reset_en != cmd_buffer->state.last_primitive_reset_en) { - cmd_buffer->state.last_primitive_reset_en = primitive_reset_en; - radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, - primitive_reset_en); + if (stages & VK_SHADER_STAGE_VERTEX_BIT) { + radeon_set_sh_reg_seq(cmd_buffer->cs, + R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_PUSH_CONST_DYN * 4, 2); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, va >> 32); } - if (primitive_reset_en) { - uint32_t primitive_reset_index = cmd_buffer->state.index_type ? 0xffffffffu : 0xffffu; + if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) { + radeon_set_sh_reg_seq(cmd_buffer->cs, + R_00B030_SPI_SHADER_USER_DATA_PS_0 + AC_USERDATA_PUSH_CONST_DYN * 4, 2); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, va >> 32); + } - if (primitive_reset_index != cmd_buffer->state.last_primitive_reset_index) { - cmd_buffer->state.last_primitive_reset_index = primitive_reset_index; - radeon_set_context_reg(cmd_buffer->cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, - primitive_reset_index); - } + if (stages & VK_SHADER_STAGE_COMPUTE_BIT) { + radeon_set_sh_reg_seq(cmd_buffer->cs, + R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_PUSH_CONST_DYN * 4, 2); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, va >> 32); } + + cmd_buffer->push_constant_stages &= ~stages; } static void -radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, - bool indexed_draw, bool instanced_draw, - bool indirect_draw, - uint32_t draw_vertex_count) +radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer) { struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; struct radv_device *device = cmd_buffer->device; uint32_t ia_multi_vgt_param; + uint32_t ls_hs_config = 0; - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, - cmd_buffer->cs, 4096); + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, + 4096); if ((cmd_buffer->state.vertex_descriptors_dirty || cmd_buffer->state.vb_dirty) && cmd_buffer->state.pipeline->num_vertex_attribs) { @@ -1469,7 +978,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, va += offset + buffer->offset; desc[0] = va; desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); - if (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride) + if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class <= CIK && stride) desc[2] = (buffer->size - offset - cmd_buffer->state.pipeline->va_format_size[i]) / stride + 1; else desc[2] = buffer->size - offset; @@ -1478,9 +987,11 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, va = device->ws->buffer_get_va(cmd_buffer->upload.upload_bo); va += vb_offset; + radeon_set_sh_reg_seq(cmd_buffer->cs, + R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_VERTEX_BUFFERS * 4, 2); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, va >> 32); - radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_VERTEX, - AC_UD_VS_VERTEX_BUFFERS, va); } cmd_buffer->state.vertex_descriptors_dirty = false; @@ -1491,32 +1002,31 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RENDER_TARGETS) radv_emit_framebuffer_state(cmd_buffer); - ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, draw_vertex_count); - if (cmd_buffer->state.last_ia_multi_vgt_param != ia_multi_vgt_param) { - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) - radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); - else - radeon_set_context_reg(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); - cmd_buffer->state.last_ia_multi_vgt_param = ia_multi_vgt_param; - } + if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT)) + radv_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR)) + radv_emit_scissor(cmd_buffer); if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) { - radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, pipeline->graphics.vgt_shader_stages_en); + radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, 0); + ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer); - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) { + radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); + radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); radeon_set_uconfig_reg_idx(cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, cmd_buffer->state.pipeline->graphics.prim); } else { radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, cmd_buffer->state.pipeline->graphics.prim); + radeon_set_context_reg(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); + radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); } radeon_set_context_reg(cmd_buffer->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, cmd_buffer->state.pipeline->graphics.gs_out); } radv_cmd_buffer_flush_dynamic_state(cmd_buffer); - radv_emit_primitive_reset_state(cmd_buffer, indexed_draw); - - radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS); - radv_flush_constants(cmd_buffer, cmd_buffer->state.pipeline, + radv_flush_constants(cmd_buffer, cmd_buffer->state.pipeline->layout, VK_SHADER_STAGE_ALL_GRAPHICS); assert(cmd_buffer->cs->cdw <= cdw_max); @@ -1554,86 +1064,11 @@ static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, } } -static enum radv_cmd_flush_bits -radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, - VkAccessFlags src_flags) -{ - enum radv_cmd_flush_bits flush_bits = 0; - uint32_t b; - for_each_bit(b, src_flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_SHADER_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; - break; - case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; - break; - case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; - break; - case VK_ACCESS_TRANSFER_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | - RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META | - RADV_CMD_FLAG_INV_GLOBAL_L2; - break; - default: - break; - } - } - return flush_bits; -} - -static enum radv_cmd_flush_bits -radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, - VkAccessFlags dst_flags, - struct radv_image *image) -{ - enum radv_cmd_flush_bits flush_bits = 0; - uint32_t b; - for_each_bit(b, dst_flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: - case VK_ACCESS_INDEX_READ_BIT: - case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: - break; - case VK_ACCESS_UNIFORM_READ_BIT: - flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1; - break; - case VK_ACCESS_SHADER_READ_BIT: - case VK_ACCESS_TRANSFER_READ_BIT: - case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: - flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2; - break; - case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: - /* TODO: change to image && when the image gets passed - * through from the subpass. */ - if (!image || (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; - break; - case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT: - if (!image || (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; - break; - default: - break; - } - } - return flush_bits; -} - static void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass_barrier *barrier) { - cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask); radv_stage_flush(cmd_buffer, barrier->src_stage_mask); - cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, - NULL); + + /* TODO: actual cache flushes */ } static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer, @@ -1651,7 +1086,7 @@ static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buf radv_handle_image_transition(cmd_buffer, view->image, cmd_buffer->state.attachments[idx].current_layout, - att.layout, 0, 0, &range, + att.layout, range, cmd_buffer->state.attachments[idx].pending_clear_aspects); cmd_buffer->state.attachments[idx].current_layout = att.layout; @@ -1751,27 +1186,9 @@ VkResult radv_AllocateCommandBuffers( VkResult result = VK_SUCCESS; uint32_t i; - memset(pCommandBuffers, 0, - sizeof(*pCommandBuffers)*pAllocateInfo->commandBufferCount); - for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { - - if (!list_empty(&pool->free_cmd_buffers)) { - struct radv_cmd_buffer *cmd_buffer = list_first_entry(&pool->free_cmd_buffers, struct radv_cmd_buffer, pool_link); - - list_del(&cmd_buffer->pool_link); - list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); - - radv_reset_cmd_buffer(cmd_buffer); - cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - cmd_buffer->level = pAllocateInfo->level; - - pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer); - result = VK_SUCCESS; - } else { - result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level, - &pCommandBuffers[i]); - } + result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level, + &pCommandBuffers[i]); if (result != VK_SUCCESS) break; } @@ -1783,6 +1200,24 @@ VkResult radv_AllocateCommandBuffers( return result; } +static void +radv_cmd_buffer_destroy(struct radv_cmd_buffer *cmd_buffer) +{ + list_del(&cmd_buffer->pool_link); + + list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, + &cmd_buffer->upload.list, list) { + cmd_buffer->device->ws->buffer_destroy(up->upload_bo); + list_del(&up->list); + free(up); + } + + if (cmd_buffer->upload.upload_bo) + cmd_buffer->device->ws->buffer_destroy(cmd_buffer->upload.upload_bo); + cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs); + vk_free(&cmd_buffer->pool->alloc, cmd_buffer); +} + void radv_FreeCommandBuffers( VkDevice device, VkCommandPool commandPool, @@ -1792,15 +1227,29 @@ void radv_FreeCommandBuffers( for (uint32_t i = 0; i < commandBufferCount; i++) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); - if (cmd_buffer) { - if (cmd_buffer->pool) { - list_del(&cmd_buffer->pool_link); - list_addtail(&cmd_buffer->pool_link, &cmd_buffer->pool->free_cmd_buffers); - } else - radv_cmd_buffer_destroy(cmd_buffer); + if (cmd_buffer) + radv_cmd_buffer_destroy(cmd_buffer); + } +} - } +static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) +{ + + cmd_buffer->device->ws->cs_reset(cmd_buffer->cs); + + list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, + &cmd_buffer->upload.list, list) { + cmd_buffer->device->ws->buffer_destroy(up->upload_bo); + list_del(&up->list); + free(up); } + + if (cmd_buffer->upload.upload_bo) + cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, + cmd_buffer->upload.upload_bo, 8); + cmd_buffer->upload.offset = 0; + + cmd_buffer->record_fail = false; } VkResult radv_ResetCommandBuffer( @@ -1812,20 +1261,6 @@ VkResult radv_ResetCommandBuffer( return VK_SUCCESS; } -static void emit_gfx_buffer_state(struct radv_cmd_buffer *cmd_buffer) -{ - struct radv_device *device = cmd_buffer->device; - if (device->gfx_init) { - uint64_t va = device->ws->buffer_get_va(device->gfx_init); - device->ws->cs_add_buffer(cmd_buffer->cs, device->gfx_init, 8); - radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); - radeon_emit(cmd_buffer->cs, va); - radeon_emit(cmd_buffer->cs, (va >> 32) & 0xffff); - radeon_emit(cmd_buffer->cs, device->gfx_init_size_dw & 0xffff); - } else - si_init_config(cmd_buffer); -} - VkResult radv_BeginCommandBuffer( VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo) @@ -1834,22 +1269,20 @@ VkResult radv_BeginCommandBuffer( radv_reset_cmd_buffer(cmd_buffer); memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state)); - cmd_buffer->state.last_primitive_reset_en = -1; /* setup initial configuration into command buffer */ if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { - switch (cmd_buffer->queue_family_index) { - case RADV_QUEUE_GENERAL: - emit_gfx_buffer_state(cmd_buffer); - radv_set_db_count_control(cmd_buffer); - break; - case RADV_QUEUE_COMPUTE: - si_init_compute(cmd_buffer); - break; - case RADV_QUEUE_TRANSFER: - default: - break; - } + /* Flush read caches at the beginning of CS not flushed by the kernel. */ + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_ICACHE | + RADV_CMD_FLAG_PS_PARTIAL_FLUSH | + RADV_CMD_FLAG_CS_PARTIAL_FLUSH | + RADV_CMD_FLAG_INV_VMEM_L1 | + RADV_CMD_FLAG_INV_SMEM_L1 | + RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | + RADV_CMD_FLAG_INV_GLOBAL_L2; + si_init_config(&cmd_buffer->device->instance->physicalDevice, cmd_buffer); + radv_set_db_count_control(cmd_buffer); + si_emit_cache_flush(cmd_buffer); } if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { @@ -1863,7 +1296,6 @@ VkResult radv_BeginCommandBuffer( radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false); } - radv_cmd_buffer_trace_emit(cmd_buffer); return VK_SUCCESS; } @@ -1910,10 +1342,8 @@ void radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer, { struct radeon_winsys *ws = cmd_buffer->device->ws; - assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); - cmd_buffer->state.descriptors[idx] = set; - cmd_buffer->state.descriptors_dirty |= (1 << idx); + if (!set) return; @@ -1921,6 +1351,21 @@ void radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer, if (set->descriptors[j]) ws->cs_add_buffer(cmd_buffer->cs, set->descriptors[j], 7); + radeon_set_sh_reg_seq(cmd_buffer->cs, + R_00B030_SPI_SHADER_USER_DATA_PS_0 + 8 * idx, 2); + radeon_emit(cmd_buffer->cs, set->va); + radeon_emit(cmd_buffer->cs, set->va >> 32); + + radeon_set_sh_reg_seq(cmd_buffer->cs, + R_00B130_SPI_SHADER_USER_DATA_VS_0 + 8 * idx, 2); + radeon_emit(cmd_buffer->cs, set->va); + radeon_emit(cmd_buffer->cs, set->va >> 32); + + radeon_set_sh_reg_seq(cmd_buffer->cs, + R_00B900_COMPUTE_USER_DATA_0 + 8 * idx, 2); + radeon_emit(cmd_buffer->cs, set->va); + radeon_emit(cmd_buffer->cs, set->va >> 32); + if(set->bo) ws->cs_add_buffer(cmd_buffer->cs, set->bo, 8); } @@ -1939,13 +1384,16 @@ void radv_CmdBindDescriptorSets( RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); unsigned dyn_idx = 0; + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, + MAX_SETS * 4 * 6); + for (unsigned i = 0; i < descriptorSetCount; ++i) { unsigned idx = i + firstSet; RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]); radv_bind_descriptor_set(cmd_buffer, set, idx); for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) { - unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start; + unsigned idx = j + layout->set[i].dynamic_offset_start; uint32_t *dst = cmd_buffer->dynamic_buffers + idx * 4; assert(dyn_idx < dynamicOffsetCount); @@ -1964,116 +1412,8 @@ void radv_CmdBindDescriptorSets( set->layout->dynamic_shader_stages; } } -} - -static bool radv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, - struct radv_descriptor_set *set, - struct radv_descriptor_set_layout *layout) -{ - set->size = layout->size; - set->layout = layout; - - if (cmd_buffer->push_descriptors.capacity < set->size) { - size_t new_size = MAX2(set->size, 1024); - new_size = MAX2(new_size, 2 * cmd_buffer->push_descriptors.capacity); - new_size = MIN2(new_size, 96 * MAX_PUSH_DESCRIPTORS); - - free(set->mapped_ptr); - set->mapped_ptr = malloc(new_size); - - if (!set->mapped_ptr) { - cmd_buffer->push_descriptors.capacity = 0; - cmd_buffer->record_fail = true; - return false; - } - - cmd_buffer->push_descriptors.capacity = new_size; - } - - return true; -} - -void radv_meta_push_descriptor_set( - struct radv_cmd_buffer* cmd_buffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout _layout, - uint32_t set, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet* pDescriptorWrites) -{ - RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); - struct radv_descriptor_set *push_set = &cmd_buffer->meta_push_descriptors; - unsigned bo_offset; - - assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); - - push_set->size = layout->set[set].layout->size; - push_set->layout = layout->set[set].layout; - - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->size, 32, - &bo_offset, - (void**) &push_set->mapped_ptr)) - return; - - push_set->va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo); - push_set->va += bo_offset; - - radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer, - radv_descriptor_set_to_handle(push_set), - descriptorWriteCount, pDescriptorWrites, 0, NULL); - - cmd_buffer->state.descriptors[set] = push_set; - cmd_buffer->state.descriptors_dirty |= (1 << set); -} - -void radv_CmdPushDescriptorSetKHR( - VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout _layout, - uint32_t set, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet* pDescriptorWrites) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); - struct radv_descriptor_set *push_set = &cmd_buffer->push_descriptors.set; - - assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); - - if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout)) - return; - - radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer, - radv_descriptor_set_to_handle(push_set), - descriptorWriteCount, pDescriptorWrites, 0, NULL); - - cmd_buffer->state.descriptors[set] = push_set; - cmd_buffer->state.descriptors_dirty |= (1 << set); - cmd_buffer->state.push_descriptors_dirty = true; -} - -void radv_CmdPushDescriptorSetWithTemplateKHR( - VkCommandBuffer commandBuffer, - VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, - VkPipelineLayout _layout, - uint32_t set, - const void* pData) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); - struct radv_descriptor_set *push_set = &cmd_buffer->push_descriptors.set; - - assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); - - if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout)) - return; - - radv_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set, - descriptorUpdateTemplate, pData); - cmd_buffer->state.descriptors[set] = push_set; - cmd_buffer->state.descriptors_dirty |= (1 << set); - cmd_buffer->state.push_descriptors_dirty = true; + assert(cmd_buffer->cs->cdw <= cdw_max); } void radv_CmdPushConstants(VkCommandBuffer commandBuffer, @@ -2093,9 +1433,7 @@ VkResult radv_EndCommandBuffer( { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) - si_emit_cache_flush(cmd_buffer); - + si_emit_cache_flush(cmd_buffer); if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) || cmd_buffer->record_fail) return VK_ERROR_OUT_OF_DEVICE_MEMORY; @@ -2120,8 +1458,7 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) ws->cs_add_buffer(cmd_buffer->cs, compute_shader->bo, 8); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, - cmd_buffer->cs, 16); + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 16); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B830_COMPUTE_PGM_LO, 2); radeon_emit(cmd_buffer->cs, va >> 8); @@ -2131,15 +1468,9 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) radeon_emit(cmd_buffer->cs, compute_shader->rsrc1); radeon_emit(cmd_buffer->cs, compute_shader->rsrc2); - - cmd_buffer->compute_scratch_size_needed = - MAX2(cmd_buffer->compute_scratch_size_needed, - pipeline->max_waves * pipeline->scratch_bytes_per_wave); - /* change these once we have scratch support */ radeon_set_sh_reg(cmd_buffer->cs, R_00B860_COMPUTE_TMPRING_SIZE, - S_00B860_WAVES(pipeline->max_waves) | - S_00B860_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); + S_00B860_WAVES(32) | S_00B860_WAVESIZE(0)); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); radeon_emit(cmd_buffer->cs, @@ -2152,13 +1483,6 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) assert(cmd_buffer->cs->cdw <= cdw_max); } -static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer) -{ - for (unsigned i = 0; i < MAX_SETS; i++) { - if (cmd_buffer->state.descriptors[i]) - cmd_buffer->state.descriptors_dirty |= (1u << i); - } -} void radv_CmdBindPipeline( VkCommandBuffer commandBuffer, @@ -2168,8 +1492,6 @@ void radv_CmdBindPipeline( RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); - radv_mark_descriptor_sets_dirty(cmd_buffer); - switch (pipelineBindPoint) { case VK_PIPELINE_BIND_POINT_COMPUTE: cmd_buffer->state.compute_pipeline = pipeline; @@ -2177,9 +1499,6 @@ void radv_CmdBindPipeline( break; case VK_PIPELINE_BIND_POINT_GRAPHICS: cmd_buffer->state.pipeline = pipeline; - if (!pipeline) - break; - cmd_buffer->state.vertex_descriptors_dirty = true; cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE; cmd_buffer->push_constant_stages |= pipeline->active_stages; @@ -2189,23 +1508,6 @@ void radv_CmdBindPipeline( radv_dynamic_state_copy(&cmd_buffer->state.dynamic, &pipeline->dynamic_state, pipeline->dynamic_state_mask); - - if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed) - cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size; - if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed) - cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size; - - if (radv_pipeline_has_tess(pipeline)) - cmd_buffer->tess_rings_needed = true; - - if (radv_pipeline_has_gs(pipeline)) { - struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, - AC_UD_SCRATCH_RING_OFFSETS); - if (cmd_buffer->ring_offsets_idx == -1) - cmd_buffer->ring_offsets_idx = loc->sgpr_idx; - else if (loc->sgpr_idx != -1) - assert(loc->sgpr_idx == cmd_buffer->ring_offsets_idx); - } break; default: assert(!"invalid bind point"); @@ -2342,6 +1644,7 @@ void radv_CmdSetStencilReference( cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; } + void radv_CmdExecuteCommands( VkCommandBuffer commandBuffer, uint32_t commandBufferCount, @@ -2349,44 +1652,17 @@ void radv_CmdExecuteCommands( { RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer); - /* Emit pending flushes on primary prior to executing secondary */ - si_emit_cache_flush(primary); - for (uint32_t i = 0; i < commandBufferCount; i++) { RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]); - primary->scratch_size_needed = MAX2(primary->scratch_size_needed, - secondary->scratch_size_needed); - primary->compute_scratch_size_needed = MAX2(primary->compute_scratch_size_needed, - secondary->compute_scratch_size_needed); - - if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed) - primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed; - if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed) - primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed; - if (secondary->tess_rings_needed) - primary->tess_rings_needed = true; - if (secondary->sample_positions_needed) - primary->sample_positions_needed = true; - - if (secondary->ring_offsets_idx != -1) { - if (primary->ring_offsets_idx == -1) - primary->ring_offsets_idx = secondary->ring_offsets_idx; - else - assert(secondary->ring_offsets_idx == primary->ring_offsets_idx); - } primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs); } /* if we execute secondary we need to re-emit out pipelines */ if (commandBufferCount) { primary->state.emitted_pipeline = NULL; - primary->state.emitted_compute_pipeline = NULL; primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE; primary->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ALL; - primary->state.last_primitive_reset_en = -1; - primary->state.last_primitive_reset_index = 0; - radv_mark_descriptor_sets_dirty(primary); } } @@ -2410,9 +1686,6 @@ VkResult radv_CreateCommandPool( pool->alloc = device->alloc; list_inithead(&pool->cmd_buffers); - list_inithead(&pool->free_cmd_buffers); - - pool->queue_family_index = pCreateInfo->queueFamilyIndex; *pCmdPool = radv_cmd_pool_to_handle(pool); @@ -2436,11 +1709,6 @@ void radv_DestroyCommandPool( radv_cmd_buffer_destroy(cmd_buffer); } - list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, - &pool->free_cmd_buffers, pool_link) { - radv_cmd_buffer_destroy(cmd_buffer); - } - vk_free2(&device->alloc, pAllocator, pool); } @@ -2459,22 +1727,6 @@ VkResult radv_ResetCommandPool( return VK_SUCCESS; } -void radv_TrimCommandPoolKHR( - VkDevice device, - VkCommandPool commandPool, - VkCommandPoolTrimFlagsKHR flags) -{ - RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); - - if (!pool) - return; - - list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, - &pool->free_cmd_buffers, pool_link) { - radv_cmd_buffer_destroy(cmd_buffer); - } -} - void radv_CmdBeginRenderPass( VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, @@ -2484,14 +1736,16 @@ void radv_CmdBeginRenderPass( RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass); RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, - cmd_buffer->cs, 2048); + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, + 2048); cmd_buffer->state.framebuffer = framebuffer; cmd_buffer->state.pass = pass; cmd_buffer->state.render_area = pRenderPassBegin->renderArea; radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBegin); + si_emit_cache_flush(cmd_buffer); + radv_cmd_buffer_set_subpass(cmd_buffer, pass->subpasses, true); assert(cmd_buffer->cs->cdw <= cdw_max); @@ -2504,6 +1758,7 @@ void radv_CmdNextSubpass( { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + si_emit_cache_flush(cmd_buffer); radv_cmd_buffer_resolve_subpass(cmd_buffer); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, @@ -2521,21 +1776,13 @@ void radv_CmdDraw( uint32_t firstInstance) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + radv_cmd_buffer_flush_state(cmd_buffer); - radv_cmd_buffer_flush_state(cmd_buffer, false, (instanceCount > 1), false, vertexCount); + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10); - - struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, - AC_UD_VS_BASE_VERTEX_START_INSTANCE); - if (loc->sgpr_idx != -1) { - uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(cmd_buffer->state.pipeline), - radv_pipeline_has_tess(cmd_buffer->state.pipeline)); - radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 3); - radeon_emit(cmd_buffer->cs, firstVertex); - radeon_emit(cmd_buffer->cs, firstInstance); - radeon_emit(cmd_buffer->cs, 0); - } + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_BASE_VERTEX * 4, 2); + radeon_emit(cmd_buffer->cs, firstVertex); + radeon_emit(cmd_buffer->cs, firstInstance); radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); radeon_emit(cmd_buffer->cs, instanceCount); @@ -2545,8 +1792,18 @@ void radv_CmdDraw( S_0287F0_USE_OPAQUE(0)); assert(cmd_buffer->cs->cdw <= cdw_max); +} - radv_cmd_buffer_trace_emit(cmd_buffer); +static void radv_emit_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer) +{ + uint32_t primitive_reset_index = cmd_buffer->state.last_primitive_reset_index ? 0xffffffffu : 0xffffu; + + if (cmd_buffer->state.pipeline->graphics.prim_restart_enable && + primitive_reset_index != cmd_buffer->state.last_primitive_reset_index) { + cmd_buffer->state.last_primitive_reset_index = primitive_reset_index; + radeon_set_context_reg(cmd_buffer->cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, + primitive_reset_index); + } } void radv_CmdDrawIndexed( @@ -2562,23 +1819,17 @@ void radv_CmdDrawIndexed( uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size; uint64_t index_va; - radv_cmd_buffer_flush_state(cmd_buffer, true, (instanceCount > 1), false, indexCount); + radv_cmd_buffer_flush_state(cmd_buffer); + radv_emit_primitive_reset_index(cmd_buffer); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15); + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 14); radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); radeon_emit(cmd_buffer->cs, cmd_buffer->state.index_type); - struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, - AC_UD_VS_BASE_VERTEX_START_INSTANCE); - if (loc->sgpr_idx != -1) { - uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(cmd_buffer->state.pipeline), - radv_pipeline_has_tess(cmd_buffer->state.pipeline)); - radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 3); - radeon_emit(cmd_buffer->cs, vertexOffset); - radeon_emit(cmd_buffer->cs, firstInstance); - radeon_emit(cmd_buffer->cs, 0); - } + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_BASE_VERTEX * 4, 2); + radeon_emit(cmd_buffer->cs, vertexOffset); + radeon_emit(cmd_buffer->cs, firstInstance); radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); radeon_emit(cmd_buffer->cs, instanceCount); @@ -2592,43 +1843,28 @@ void radv_CmdDrawIndexed( radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA); assert(cmd_buffer->cs->cdw <= cdw_max); - radv_cmd_buffer_trace_emit(cmd_buffer); } static void radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer, VkBuffer _buffer, VkDeviceSize offset, - VkBuffer _count_buffer, - VkDeviceSize count_offset, uint32_t draw_count, uint32_t stride, bool indexed) { RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); - RADV_FROM_HANDLE(radv_buffer, count_buffer, _count_buffer); struct radeon_winsys_cs *cs = cmd_buffer->cs; unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX; uint64_t indirect_va = cmd_buffer->device->ws->buffer_get_va(buffer->bo); indirect_va += offset + buffer->offset; - uint64_t count_va = 0; - - if (count_buffer) { - count_va = cmd_buffer->device->ws->buffer_get_va(count_buffer->bo); - count_va += count_offset + count_buffer->offset; - } if (!draw_count) return; cmd_buffer->device->ws->cs_add_buffer(cs, buffer->bo, 8); - struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, - AC_UD_VS_BASE_VERTEX_START_INSTANCE); - uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(cmd_buffer->state.pipeline), - radv_pipeline_has_tess(cmd_buffer->state.pipeline)); - assert(loc->sgpr_idx != -1); radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0)); radeon_emit(cs, 1); radeon_emit(cs, indirect_va); @@ -2638,60 +1874,51 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer, PKT3_DRAW_INDIRECT_MULTI, 8, false)); radeon_emit(cs, 0); - radeon_emit(cs, ((base_reg + loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2); - radeon_emit(cs, ((base_reg + (loc->sgpr_idx + 1) * 4) - SI_SH_REG_OFFSET) >> 2); - radeon_emit(cs, (((base_reg + (loc->sgpr_idx + 2) * 4) - SI_SH_REG_OFFSET) >> 2) | - S_2C3_DRAW_INDEX_ENABLE(1) | - S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); + radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_BASE_VERTEX * 4) - SI_SH_REG_OFFSET) >> 2); + radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_START_INSTANCE * 4) - SI_SH_REG_OFFSET) >> 2); + radeon_emit(cs, 0); /* draw_index */ radeon_emit(cs, draw_count); /* count */ - radeon_emit(cs, count_va); /* count_addr */ - radeon_emit(cs, count_va >> 32); + radeon_emit(cs, 0); /* count_addr -- disabled */ + radeon_emit(cs, 0); radeon_emit(cs, stride); /* stride */ radeon_emit(cs, di_src_sel); - radv_cmd_buffer_trace_emit(cmd_buffer); } -static void -radv_cmd_draw_indirect_count(VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - VkBuffer countBuffer, - VkDeviceSize countBufferOffset, - uint32_t maxDrawCount, - uint32_t stride) +void radv_CmdDrawIndirect( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_cmd_buffer_flush_state(cmd_buffer, false, false, true, 0); + radv_cmd_buffer_flush_state(cmd_buffer); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, - cmd_buffer->cs, 14); + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 14); - radv_emit_indirect_draw(cmd_buffer, buffer, offset, - countBuffer, countBufferOffset, maxDrawCount, stride, false); + radv_emit_indirect_draw(cmd_buffer, _buffer, offset, drawCount, stride, false); assert(cmd_buffer->cs->cdw <= cdw_max); } -static void -radv_cmd_draw_indexed_indirect_count( +void radv_CmdDrawIndexedIndirect( VkCommandBuffer commandBuffer, - VkBuffer buffer, + VkBuffer _buffer, VkDeviceSize offset, - VkBuffer countBuffer, - VkDeviceSize countBufferOffset, - uint32_t maxDrawCount, + uint32_t drawCount, uint32_t stride) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); int index_size = cmd_buffer->state.index_type ? 4 : 2; uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size; uint64_t index_va; - radv_cmd_buffer_flush_state(cmd_buffer, true, false, true, 0); + radv_cmd_buffer_flush_state(cmd_buffer); + radv_emit_primitive_reset_index(cmd_buffer); index_va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->state.index_buffer->bo); index_va += cmd_buffer->state.index_buffer->offset + cmd_buffer->state.index_offset; - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 21); + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 21); radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); radeon_emit(cmd_buffer->cs, cmd_buffer->state.index_type); @@ -2703,72 +1930,11 @@ radv_cmd_draw_indexed_indirect_count( radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0)); radeon_emit(cmd_buffer->cs, index_max_size); - radv_emit_indirect_draw(cmd_buffer, buffer, offset, - countBuffer, countBufferOffset, maxDrawCount, stride, true); + radv_emit_indirect_draw(cmd_buffer, _buffer, offset, drawCount, stride, true); assert(cmd_buffer->cs->cdw <= cdw_max); } -void radv_CmdDrawIndirect( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - radv_cmd_draw_indirect_count(commandBuffer, buffer, offset, - VK_NULL_HANDLE, 0, drawCount, stride); -} - -void radv_CmdDrawIndexedIndirect( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - radv_cmd_draw_indexed_indirect_count(commandBuffer, buffer, offset, - VK_NULL_HANDLE, 0, drawCount, stride); -} - -void radv_CmdDrawIndirectCountAMD( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - VkBuffer countBuffer, - VkDeviceSize countBufferOffset, - uint32_t maxDrawCount, - uint32_t stride) -{ - radv_cmd_draw_indirect_count(commandBuffer, buffer, offset, - countBuffer, countBufferOffset, - maxDrawCount, stride); -} - -void radv_CmdDrawIndexedIndirectCountAMD( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - VkBuffer countBuffer, - VkDeviceSize countBufferOffset, - uint32_t maxDrawCount, - uint32_t stride) -{ - radv_cmd_draw_indexed_indirect_count(commandBuffer, buffer, offset, - countBuffer, countBufferOffset, - maxDrawCount, stride); -} - -static void -radv_flush_compute_state(struct radv_cmd_buffer *cmd_buffer) -{ - radv_emit_compute_pipeline(cmd_buffer); - radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT); - radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline, - VK_SHADER_STAGE_COMPUTE_BIT); - si_emit_cache_flush(cmd_buffer); -} - void radv_CmdDispatch( VkCommandBuffer commandBuffer, uint32_t x, @@ -2777,20 +1943,16 @@ void radv_CmdDispatch( { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_flush_compute_state(cmd_buffer); - - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10); + radv_emit_compute_pipeline(cmd_buffer); + radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline->layout, + VK_SHADER_STAGE_COMPUTE_BIT); + si_emit_cache_flush(cmd_buffer); + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10); - struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline, - MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); - if (loc->sgpr_idx != -1) { - assert(!loc->indirect); - assert(loc->num_sgprs == 3); - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3); - radeon_emit(cmd_buffer->cs, x); - radeon_emit(cmd_buffer->cs, y); - radeon_emit(cmd_buffer->cs, z); - } + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_CS_GRID_SIZE * 4, 3); + radeon_emit(cmd_buffer->cs, x); + radeon_emit(cmd_buffer->cs, y); + radeon_emit(cmd_buffer->cs, z); radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) | PKT3_SHADER_TYPE_S(1)); @@ -2800,7 +1962,6 @@ void radv_CmdDispatch( radeon_emit(cmd_buffer->cs, 1); assert(cmd_buffer->cs->cdw <= cdw_max); - radv_cmd_buffer_trace_emit(cmd_buffer); } void radv_CmdDispatchIndirect( @@ -2815,44 +1976,35 @@ void radv_CmdDispatchIndirect( cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, buffer->bo, 8); - radv_flush_compute_state(cmd_buffer); - - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 25); - struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline, - MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); - if (loc->sgpr_idx != -1) { - for (unsigned i = 0; i < 3; ++i) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG)); - radeon_emit(cmd_buffer->cs, (va + 4 * i)); - radeon_emit(cmd_buffer->cs, (va + 4 * i) >> 32); - radeon_emit(cmd_buffer->cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i); - radeon_emit(cmd_buffer->cs, 0); - } - } + radv_emit_compute_pipeline(cmd_buffer); + radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline->layout, + VK_SHADER_STAGE_COMPUTE_BIT); + si_emit_cache_flush(cmd_buffer); - if (radv_cmd_buffer_uses_mec(cmd_buffer)) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | - PKT3_SHADER_TYPE_S(1)); - radeon_emit(cmd_buffer->cs, va); - radeon_emit(cmd_buffer->cs, va >> 32); - radeon_emit(cmd_buffer->cs, 1); - } else { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_BASE, 2, 0) | - PKT3_SHADER_TYPE_S(1)); - radeon_emit(cmd_buffer->cs, 1); - radeon_emit(cmd_buffer->cs, va); - radeon_emit(cmd_buffer->cs, va >> 32); + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 25); - radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) | - PKT3_SHADER_TYPE_S(1)); + for (unsigned i = 0; i < 3; ++i) { + radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | + COPY_DATA_DST_SEL(COPY_DATA_REG)); + radeon_emit(cmd_buffer->cs, (va + 4 * i)); + radeon_emit(cmd_buffer->cs, (va + 4 * i) >> 32); + radeon_emit(cmd_buffer->cs, ((R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_CS_GRID_SIZE * 4) >> 2) + i); radeon_emit(cmd_buffer->cs, 0); - radeon_emit(cmd_buffer->cs, 1); } + radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_BASE, 2, 0) | + PKT3_SHADER_TYPE_S(1)); + radeon_emit(cmd_buffer->cs, 1); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, va >> 32); + + radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) | + PKT3_SHADER_TYPE_S(1)); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, 1); + assert(cmd_buffer->cs->cdw <= cdw_max); - radv_cmd_buffer_trace_emit(cmd_buffer); } void radv_unaligned_dispatch( @@ -2874,9 +2026,11 @@ void radv_unaligned_dispatch( remainder[1] = y + compute_shader->info.cs.block_size[1] - align_u32_npot(y, compute_shader->info.cs.block_size[1]); remainder[2] = z + compute_shader->info.cs.block_size[2] - align_u32_npot(z, compute_shader->info.cs.block_size[2]); - radv_flush_compute_state(cmd_buffer); - - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15); + radv_emit_compute_pipeline(cmd_buffer); + radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline->layout, + VK_SHADER_STAGE_COMPUTE_BIT); + si_emit_cache_flush(cmd_buffer); + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); radeon_emit(cmd_buffer->cs, @@ -2889,14 +2043,11 @@ void radv_unaligned_dispatch( S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]) | S_00B81C_NUM_THREAD_PARTIAL(remainder[2])); - struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline, - MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); - if (loc->sgpr_idx != -1) { - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3); - radeon_emit(cmd_buffer->cs, blocks[0]); - radeon_emit(cmd_buffer->cs, blocks[1]); - radeon_emit(cmd_buffer->cs, blocks[2]); - } + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_CS_GRID_SIZE * 4, 3); + radeon_emit(cmd_buffer->cs, blocks[0]); + radeon_emit(cmd_buffer->cs, blocks[1]); + radeon_emit(cmd_buffer->cs, blocks[2]); + radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) | PKT3_SHADER_TYPE_S(1)); radeon_emit(cmd_buffer->cs, blocks[0]); @@ -2906,7 +2057,6 @@ void radv_unaligned_dispatch( S_00B800_PARTIAL_TG_EN(1)); assert(cmd_buffer->cs->cdw <= cdw_max); - radv_cmd_buffer_trace_emit(cmd_buffer); } void radv_CmdEndRenderPass( @@ -2916,6 +2066,7 @@ void radv_CmdEndRenderPass( radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier); + si_emit_cache_flush(cmd_buffer); radv_cmd_buffer_resolve_subpass(cmd_buffer); for (unsigned i = 0; i < cmd_buffer->state.framebuffer->attachment_count; ++i) { @@ -2934,32 +2085,26 @@ void radv_CmdEndRenderPass( static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - const VkImageSubresourceRange *range) + struct radv_image *image) { - assert(range->baseMipLevel == 0); - assert(range->levelCount == 1 || range->levelCount == VK_REMAINING_ARRAY_LAYERS); - unsigned layer_count = radv_get_layerCount(image, range); - uint64_t size = image->surface.htile_slice_size * layer_count; - uint64_t offset = image->offset + image->htile_offset + - image->surface.htile_slice_size * range->baseArrayLayer; cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; - radv_fill_buffer(cmd_buffer, image->bo, offset, size, 0xffffffff); + radv_fill_buffer(cmd_buffer, image->bo, image->offset + image->htile.offset, + image->htile.size, 0xffffffff); cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; + RADV_CMD_FLAG_INV_GLOBAL_L2; } static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, VkImageLayout dst_layout, - const VkImageSubresourceRange *range, + VkImageSubresourceRange range, VkImageAspectFlags pending_clears) { if (dst_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL && @@ -2972,26 +2117,20 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe } else if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED && radv_layout_has_htile(image, dst_layout)) { /* TODO: merge with the clear if applicable */ - radv_initialize_htile(cmd_buffer, image, range); + radv_initialize_htile(cmd_buffer, image); } else if (!radv_layout_has_htile(image, src_layout) && radv_layout_has_htile(image, dst_layout)) { - radv_initialize_htile(cmd_buffer, image, range); + radv_initialize_htile(cmd_buffer, image); } else if ((radv_layout_has_htile(image, src_layout) && !radv_layout_has_htile(image, dst_layout)) || (radv_layout_is_htile_compressed(image, src_layout) && !radv_layout_is_htile_compressed(image, dst_layout))) { - VkImageSubresourceRange local_range = *range; - local_range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - local_range.baseMipLevel = 0; - local_range.levelCount = 1; - - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; - radv_decompress_depth_image_inplace(cmd_buffer, image, &local_range); + range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + range.baseMipLevel = 0; + range.levelCount = 1; - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; + radv_decompress_depth_image_inplace(cmd_buffer, image, &range); } } @@ -3007,16 +2146,14 @@ void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; + RADV_CMD_FLAG_INV_GLOBAL_L2; } static void radv_handle_cmask_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, VkImageLayout dst_layout, - unsigned src_queue_mask, - unsigned dst_queue_mask, - const VkImageSubresourceRange *range, + VkImageSubresourceRange range, VkImageAspectFlags pending_clears) { if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { @@ -3024,9 +2161,9 @@ static void radv_handle_cmask_image_transition(struct radv_cmd_buffer *cmd_buffe radv_initialise_cmask(cmd_buffer, image, 0xccccccccu); else radv_initialise_cmask(cmd_buffer, image, 0xffffffffu); - } else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) && - !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) { - radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); + } else if (radv_layout_has_cmask(image, src_layout) && + !radv_layout_has_cmask(image, dst_layout)) { + radv_fast_clear_flush_image_inplace(cmd_buffer, image); } } @@ -3044,23 +2181,21 @@ void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer, RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; + RADV_CMD_FLAG_INV_GLOBAL_L2; } static void radv_handle_dcc_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, VkImageLayout dst_layout, - unsigned src_queue_mask, - unsigned dst_queue_mask, - const VkImageSubresourceRange *range, + VkImageSubresourceRange range, VkImageAspectFlags pending_clears) { if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { radv_initialize_dcc(cmd_buffer, image, 0x20202020u); - } else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) && - !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) { - radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); + } else if(src_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && + dst_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { + radv_fast_clear_flush_image_inplace(cmd_buffer, image); } } @@ -3068,46 +2203,20 @@ static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, VkImageLayout dst_layout, - uint32_t src_family, - uint32_t dst_family, - const VkImageSubresourceRange *range, + VkImageSubresourceRange range, VkImageAspectFlags pending_clears) { - if (image->exclusive && src_family != dst_family) { - /* This is an acquire or a release operation and there will be - * a corresponding release/acquire. Do the transition in the - * most flexible queue. */ - - assert(src_family == cmd_buffer->queue_family_index || - dst_family == cmd_buffer->queue_family_index); - - if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER) - return; - - if (cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && - (src_family == RADV_QUEUE_GENERAL || - dst_family == RADV_QUEUE_GENERAL)) - return; - } - - unsigned src_queue_mask = radv_image_queue_family_mask(image, src_family, cmd_buffer->queue_family_index); - unsigned dst_queue_mask = radv_image_queue_family_mask(image, dst_family, cmd_buffer->queue_family_index); - - if (image->surface.htile_size) + if (image->htile.size) radv_handle_depth_image_transition(cmd_buffer, image, src_layout, dst_layout, range, pending_clears); if (image->cmask.size) radv_handle_cmask_image_transition(cmd_buffer, image, src_layout, - dst_layout, src_queue_mask, - dst_queue_mask, range, - pending_clears); + dst_layout, range, pending_clears); if (image->surface.dcc_size) radv_handle_dcc_image_transition(cmd_buffer, image, src_layout, - dst_layout, src_queue_mask, - dst_queue_mask, range, - pending_clears); + dst_layout, range, pending_clears); } void radv_CmdPipelineBarrier( @@ -3123,43 +2232,76 @@ void radv_CmdPipelineBarrier( const VkImageMemoryBarrier* pImageMemoryBarriers) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - enum radv_cmd_flush_bits src_flush_bits = 0; - enum radv_cmd_flush_bits dst_flush_bits = 0; - + VkAccessFlags src_flags = 0; + VkAccessFlags dst_flags = 0; + uint32_t b; for (uint32_t i = 0; i < memoryBarrierCount; i++) { - src_flush_bits |= radv_src_access_flush(cmd_buffer, pMemoryBarriers[i].srcAccessMask); - dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pMemoryBarriers[i].dstAccessMask, - NULL); + src_flags |= pMemoryBarriers[i].srcAccessMask; + dst_flags |= pMemoryBarriers[i].dstAccessMask; } for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { - src_flush_bits |= radv_src_access_flush(cmd_buffer, pBufferMemoryBarriers[i].srcAccessMask); - dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pBufferMemoryBarriers[i].dstAccessMask, - NULL); + src_flags |= pBufferMemoryBarriers[i].srcAccessMask; + dst_flags |= pBufferMemoryBarriers[i].dstAccessMask; } for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); - src_flush_bits |= radv_src_access_flush(cmd_buffer, pImageMemoryBarriers[i].srcAccessMask); - dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pImageMemoryBarriers[i].dstAccessMask, - image); - } - - radv_stage_flush(cmd_buffer, srcStageMask); - cmd_buffer->state.flush_bits |= src_flush_bits; + src_flags |= pImageMemoryBarriers[i].srcAccessMask; + dst_flags |= pImageMemoryBarriers[i].dstAccessMask; - for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { - RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); radv_handle_image_transition(cmd_buffer, image, pImageMemoryBarriers[i].oldLayout, pImageMemoryBarriers[i].newLayout, - pImageMemoryBarriers[i].srcQueueFamilyIndex, - pImageMemoryBarriers[i].dstQueueFamilyIndex, - &pImageMemoryBarriers[i].subresourceRange, + pImageMemoryBarriers[i].subresourceRange, 0); } - cmd_buffer->state.flush_bits |= dst_flush_bits; + enum radv_cmd_flush_bits flush_bits = 0; + + for_each_bit(b, src_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_SHADER_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2; + break; + case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; + break; + case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; + break; + case VK_ACCESS_TRANSFER_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; + break; + default: + break; + } + } + + for_each_bit(b, dst_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: + case VK_ACCESS_INDEX_READ_BIT: + case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: + case VK_ACCESS_UNIFORM_READ_BIT: + flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1; + break; + case VK_ACCESS_SHADER_READ_BIT: + flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2; + break; + case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: + case VK_ACCESS_TRANSFER_READ_BIT: + case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: + flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2; + default: + break; + } + } + + flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | + RADV_CMD_FLAG_PS_PARTIAL_FLUSH; + + cmd_buffer->state.flush_bits |= flush_bits; } @@ -3173,12 +2315,12 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->device->ws->cs_add_buffer(cs, event->bo, 8); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12); + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12); /* TODO: this is overkill. Probably should figure something out from * the stage mask. */ - if (cmd_buffer->device->physical_device->rad_info.chip_class == CIK) { + if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class == CIK) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5)); @@ -3240,7 +2382,7 @@ void radv_CmdWaitEvents(VkCommandBuffer commandBuffer, cmd_buffer->device->ws->cs_add_buffer(cs, event->bo, 8); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7); + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7); radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); @@ -3260,9 +2402,7 @@ void radv_CmdWaitEvents(VkCommandBuffer commandBuffer, radv_handle_image_transition(cmd_buffer, image, pImageMemoryBarriers[i].oldLayout, pImageMemoryBarriers[i].newLayout, - pImageMemoryBarriers[i].srcQueueFamilyIndex, - pImageMemoryBarriers[i].dstQueueFamilyIndex, - &pImageMemoryBarriers[i].subresourceRange, + pImageMemoryBarriers[i].subresourceRange, 0); } diff --git a/lib/mesa/src/amd/vulkan/radv_cs.h b/lib/mesa/src/amd/vulkan/radv_cs.h index 0990270f5..2c8935f30 100644 --- a/lib/mesa/src/amd/vulkan/radv_cs.h +++ b/lib/mesa/src/amd/vulkan/radv_cs.h @@ -43,7 +43,6 @@ static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsign { assert(reg < R600_CONTEXT_REG_OFFSET); assert(cs->cdw + 2 + num <= cs->max_dw); - assert(num); radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0)); radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2); } @@ -58,7 +57,6 @@ static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsig { assert(reg >= R600_CONTEXT_REG_OFFSET); assert(cs->cdw + 2 + num <= cs->max_dw); - assert(num); radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0)); radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2); } @@ -85,7 +83,6 @@ static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned r { assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); assert(cs->cdw + 2 + num <= cs->max_dw); - assert(num); radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0)); radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2); } @@ -100,7 +97,6 @@ static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsig { assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); assert(cs->cdw + 2 + num <= cs->max_dw); - assert(num); radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0)); radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); } diff --git a/lib/mesa/src/amd/vulkan/radv_descriptor_set.c b/lib/mesa/src/amd/vulkan/radv_descriptor_set.c index 48cb8c2a3..eb8b5d6e3 100644 --- a/lib/mesa/src/amd/vulkan/radv_descriptor_set.c +++ b/lib/mesa/src/amd/vulkan/radv_descriptor_set.c @@ -50,19 +50,18 @@ VkResult radv_CreateDescriptorSetLayout( immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount; } - uint32_t samplers_offset = sizeof(struct radv_descriptor_set_layout) + - (max_binding + 1) * sizeof(set_layout->binding[0]); - size_t size = samplers_offset + immutable_sampler_count * 4 * sizeof(uint32_t); + size_t size = sizeof(struct radv_descriptor_set_layout) + + (max_binding + 1) * sizeof(set_layout->binding[0]) + + immutable_sampler_count * sizeof(struct radv_sampler *); set_layout = vk_alloc2(&device->alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!set_layout) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - set_layout->flags = pCreateInfo->flags; - /* We just allocate all the samplers at the end of the struct */ - uint32_t *samplers = (uint32_t*)&set_layout->binding[max_binding + 1]; + struct radv_sampler **samplers = + (struct radv_sampler **)&set_layout->binding[max_binding + 1]; set_layout->binding_count = max_binding + 1; set_layout->shader_stages = 0; @@ -81,7 +80,6 @@ VkResult radv_CreateDescriptorSetLayout( switch (binding->descriptorType) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - assert(!(pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); set_layout->binding[b].dynamic_offset_count = 1; set_layout->dynamic_shader_stages |= binding->stageFlags; set_layout->binding[b].size = 0; @@ -127,32 +125,23 @@ VkResult radv_CreateDescriptorSetLayout( set_layout->binding[b].buffer_offset = buffer_count; set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count; - if (binding->pImmutableSamplers) { - set_layout->binding[b].immutable_samplers_offset = samplers_offset; - set_layout->binding[b].immutable_samplers_equal = true; + set_layout->size += binding->descriptorCount * set_layout->binding[b].size; + buffer_count += binding->descriptorCount * set_layout->binding[b].buffer_count; + dynamic_offset_count += binding->descriptorCount * + set_layout->binding[b].dynamic_offset_count; + + if (binding->pImmutableSamplers) { + set_layout->binding[b].immutable_samplers = samplers; + samplers += binding->descriptorCount; for (uint32_t i = 0; i < binding->descriptorCount; i++) - memcpy(samplers + 4 * i, &radv_sampler_from_handle(binding->pImmutableSamplers[i])->state, 16); - for (uint32_t i = 1; i < binding->descriptorCount; i++) - if (memcmp(samplers + 4 * i, samplers, 16) != 0) - set_layout->binding[b].immutable_samplers_equal = false; - - /* Don't reserve space for the samplers if they're not accessed. */ - if (set_layout->binding[b].immutable_samplers_equal) { - if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) - set_layout->binding[b].size -= 32; - else if (binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) - set_layout->binding[b].size -= 16; - } - samplers += 4 * binding->descriptorCount; - samplers_offset += 4 * sizeof(uint32_t) * binding->descriptorCount; + set_layout->binding[b].immutable_samplers[i] = + radv_sampler_from_handle(binding->pImmutableSamplers[i]); + } else { + set_layout->binding[b].immutable_samplers = NULL; } - set_layout->size += binding->descriptorCount * set_layout->binding[b].size; - buffer_count += binding->descriptorCount * set_layout->binding[b].buffer_count; - dynamic_offset_count += binding->descriptorCount * - set_layout->binding[b].dynamic_offset_count; set_layout->shader_stages |= binding->stageFlags; } @@ -191,7 +180,7 @@ VkResult radv_CreatePipelineLayout( { RADV_FROM_HANDLE(radv_device, device, _device); struct radv_pipeline_layout *layout; - struct mesa_sha1 ctx; + struct mesa_sha1 *ctx; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); @@ -205,7 +194,7 @@ VkResult radv_CreatePipelineLayout( unsigned dynamic_offset_count = 0; - _mesa_sha1_init(&ctx); + ctx = _mesa_sha1_init(); for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[set]); @@ -214,11 +203,8 @@ VkResult radv_CreatePipelineLayout( layout->set[set].dynamic_offset_start = dynamic_offset_count; for (uint32_t b = 0; b < set_layout->binding_count; b++) { dynamic_offset_count += set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count; - if (set_layout->binding[b].immutable_samplers_offset) - _mesa_sha1_update(&ctx, radv_immutable_samplers(set_layout, set_layout->binding + b), - set_layout->binding[b].array_size * 4 * sizeof(uint32_t)); } - _mesa_sha1_update(&ctx, set_layout->binding, + _mesa_sha1_update(ctx, set_layout->binding, sizeof(set_layout->binding[0]) * set_layout->binding_count); } @@ -231,9 +217,9 @@ VkResult radv_CreatePipelineLayout( } layout->push_constant_size = align(layout->push_constant_size, 16); - _mesa_sha1_update(&ctx, &layout->push_constant_size, + _mesa_sha1_update(ctx, &layout->push_constant_size, sizeof(layout->push_constant_size)); - _mesa_sha1_final(&ctx, layout->sha1); + _mesa_sha1_final(ctx, layout->sha1); *pPipelineLayout = radv_pipeline_layout_to_handle(layout); return VK_SUCCESS; @@ -257,6 +243,7 @@ void radv_DestroyPipelineLayout( static VkResult radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_pool *pool, + struct radv_cmd_buffer *cmd_buffer, const struct radv_descriptor_set_layout *layout, struct radv_descriptor_set **out_set) { @@ -287,53 +274,72 @@ radv_descriptor_set_create(struct radv_device *device, if (layout->size) { uint32_t layout_size = align_u32(layout->size, 32); set->size = layout->size; + if (!cmd_buffer) { + if (pool->current_offset + layout_size <= pool->size) { + set->bo = pool->bo; + set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset); + set->va = device->ws->buffer_get_va(set->bo) + pool->current_offset; + pool->current_offset += layout_size; + + } else { + int entry = pool->free_list, prev_entry = -1; + uint32_t offset; + while (entry >= 0) { + if (pool->free_nodes[entry].size >= layout_size) { + if (prev_entry >= 0) + pool->free_nodes[prev_entry].next = pool->free_nodes[entry].next; + else + pool->free_list = pool->free_nodes[entry].next; + break; + } + prev_entry = entry; + entry = pool->free_nodes[entry].next; + } - /* try to allocate linearly first, so that we don't spend - * time looking for gaps if the app only allocates & - * resets via the pool. */ - if (pool->current_offset + layout_size <= pool->size) { - set->bo = pool->bo; - set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset); - set->va = device->ws->buffer_get_va(set->bo) + pool->current_offset; - pool->current_offset += layout_size; - list_addtail(&set->vram_list, &pool->vram_list); - } else { - uint64_t offset = 0; - struct list_head *prev = &pool->vram_list; - struct radv_descriptor_set *cur; - LIST_FOR_EACH_ENTRY(cur, &pool->vram_list, vram_list) { - uint64_t start = (uint8_t*)cur->mapped_ptr - pool->mapped_ptr; - if (start - offset >= layout_size) - break; - - offset = start + cur->size; - prev = &cur->vram_list; - } + if (entry < 0) { + vk_free2(&device->alloc, NULL, set); + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + offset = pool->free_nodes[entry].offset; + pool->free_nodes[entry].next = pool->full_list; + pool->full_list = entry; - if (pool->size - offset < layout_size) { + set->bo = pool->bo; + set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset); + set->va = device->ws->buffer_get_va(set->bo) + offset; + } + } else { + unsigned bo_offset; + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, set->size, 32, + &bo_offset, + (void**)&set->mapped_ptr)) { vk_free2(&device->alloc, NULL, set->dynamic_descriptors); vk_free2(&device->alloc, NULL, set); - return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - set->bo = pool->bo; - set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset); - set->va = device->ws->buffer_get_va(set->bo) + offset; - list_add(&set->vram_list, prev); + + set->va = device->ws->buffer_get_va(cmd_buffer->upload.upload_bo); + set->va += bo_offset; } } + if (pool) + list_add(&set->descriptor_pool, &pool->descriptor_sets); + else + list_inithead(&set->descriptor_pool); + for (unsigned i = 0; i < layout->binding_count; ++i) { - if (!layout->binding[i].immutable_samplers_offset || - layout->binding[i].immutable_samplers_equal) + if (!layout->binding[i].immutable_samplers) continue; unsigned offset = layout->binding[i].offset / 4; if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) offset += 16; - const uint32_t *samplers = (const uint32_t*)((const char*)layout + layout->binding[i].immutable_samplers_offset); for (unsigned j = 0; j < layout->binding[i].array_size; ++j) { - memcpy(set->mapped_ptr + offset, samplers + 4 * j, 16); + struct radv_sampler* sampler = layout->binding[i].immutable_samplers[j]; + + memcpy(set->mapped_ptr + offset, &sampler->state, 16); offset += layout->binding[i].size / 4; } @@ -348,13 +354,46 @@ radv_descriptor_set_destroy(struct radv_device *device, struct radv_descriptor_set *set, bool free_bo) { - if (free_bo && set->size) - list_del(&set->vram_list); + if (free_bo && set->size) { + assert(pool->full_list >= 0); + int next = pool->free_nodes[pool->full_list].next; + pool->free_nodes[pool->full_list].next = pool->free_list; + pool->free_nodes[pool->full_list].offset = (uint8_t*)set->mapped_ptr - pool->mapped_ptr; + pool->free_nodes[pool->full_list].size = align_u32(set->size, 32); + pool->free_list = pool->full_list; + pool->full_list = next; + } if (set->dynamic_descriptors) vk_free2(&device->alloc, NULL, set->dynamic_descriptors); + if (!list_empty(&set->descriptor_pool)) + list_del(&set->descriptor_pool); vk_free2(&device->alloc, NULL, set); } +VkResult +radv_temp_descriptor_set_create(struct radv_device *device, + struct radv_cmd_buffer *cmd_buffer, + VkDescriptorSetLayout _layout, + VkDescriptorSet *_set) +{ + RADV_FROM_HANDLE(radv_descriptor_set_layout, layout, _layout); + struct radv_descriptor_set *set; + VkResult ret; + + ret = radv_descriptor_set_create(device, NULL, cmd_buffer, layout, &set); + *_set = radv_descriptor_set_to_handle(set); + return ret; +} + +void +radv_temp_descriptor_set_destroy(struct radv_device *device, + VkDescriptorSet _set) +{ + RADV_FROM_HANDLE(radv_descriptor_set, set, _set); + + radv_descriptor_set_destroy(device, NULL, set, false); +} + VkResult radv_CreateDescriptorPool( VkDevice _device, const VkDescriptorPoolCreateInfo* pCreateInfo, @@ -363,7 +402,9 @@ VkResult radv_CreateDescriptorPool( { RADV_FROM_HANDLE(radv_device, device, _device); struct radv_descriptor_pool *pool; - int size = sizeof(struct radv_descriptor_pool); + unsigned max_sets = pCreateInfo->maxSets * 2; + int size = sizeof(struct radv_descriptor_pool) + + max_sets * sizeof(struct radv_descriptor_pool_free_node); uint64_t bo_size = 0; pool = vk_alloc2(&device->alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -372,6 +413,14 @@ VkResult radv_CreateDescriptorPool( memset(pool, 0, sizeof(*pool)); + pool->free_list = -1; + pool->full_list = 0; + pool->free_nodes[max_sets - 1].next = -1; + pool->max_sets = max_sets; + + for (int i = 0; i + 1 < max_sets; ++i) + pool->free_nodes[i].next = i + 1; + for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) { switch(pCreateInfo->pPoolSizes[i].type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: @@ -406,7 +455,7 @@ VkResult radv_CreateDescriptorPool( } pool->size = bo_size; - list_inithead(&pool->vram_list); + list_inithead(&pool->descriptor_sets); *pDescriptorPool = radv_descriptor_pool_to_handle(pool); return VK_SUCCESS; } @@ -423,7 +472,7 @@ void radv_DestroyDescriptorPool( return; list_for_each_entry_safe(struct radv_descriptor_set, set, - &pool->vram_list, vram_list) { + &pool->descriptor_sets, descriptor_pool) { radv_descriptor_set_destroy(device, pool, set, false); } @@ -441,13 +490,17 @@ VkResult radv_ResetDescriptorPool( RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool); list_for_each_entry_safe(struct radv_descriptor_set, set, - &pool->vram_list, vram_list) { + &pool->descriptor_sets, descriptor_pool) { radv_descriptor_set_destroy(device, pool, set, false); } - list_inithead(&pool->vram_list); - pool->current_offset = 0; + pool->free_list = -1; + pool->full_list = 0; + pool->free_nodes[pool->max_sets - 1].next = -1; + + for (int i = 0; i + 1 < pool->max_sets; ++i) + pool->free_nodes[i].next = i + 1; return VK_SUCCESS; } @@ -469,9 +522,7 @@ VkResult radv_AllocateDescriptorSets( RADV_FROM_HANDLE(radv_descriptor_set_layout, layout, pAllocateInfo->pSetLayouts[i]); - assert(!(layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); - - result = radv_descriptor_set_create(device, pool, layout, &set); + result = radv_descriptor_set_create(device, pool, NULL, layout, &set); if (result != VK_SUCCESS) break; @@ -503,7 +554,6 @@ VkResult radv_FreeDescriptorSets( } static void write_texel_buffer_descriptor(struct radv_device *device, - struct radv_cmd_buffer *cmd_buffer, unsigned *dst, struct radeon_winsys_bo **buffer_list, const VkBufferView _buffer_view) @@ -511,15 +561,10 @@ static void write_texel_buffer_descriptor(struct radv_device *device, RADV_FROM_HANDLE(radv_buffer_view, buffer_view, _buffer_view); memcpy(dst, buffer_view->state, 4 * 4); - - if (cmd_buffer) - device->ws->cs_add_buffer(cmd_buffer->cs, buffer_view->bo, 7); - else - *buffer_list = buffer_view->bo; + *buffer_list = buffer_view->bo; } static void write_buffer_descriptor(struct radv_device *device, - struct radv_cmd_buffer *cmd_buffer, unsigned *dst, struct radeon_winsys_bo **buffer_list, const VkDescriptorBufferInfo *buffer_info) @@ -542,10 +587,7 @@ static void write_buffer_descriptor(struct radv_device *device, S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); - if (cmd_buffer) - device->ws->cs_add_buffer(cmd_buffer->cs, buffer->bo, 7); - else - *buffer_list = buffer->bo; + *buffer_list = buffer->bo; } static void write_dynamic_buffer_descriptor(struct radv_device *device, @@ -569,7 +611,6 @@ static void write_dynamic_buffer_descriptor(struct radv_device *device, static void write_image_descriptor(struct radv_device *device, - struct radv_cmd_buffer *cmd_buffer, unsigned *dst, struct radeon_winsys_bo **buffer_list, const VkDescriptorImageInfo *image_info) @@ -577,16 +618,11 @@ write_image_descriptor(struct radv_device *device, RADV_FROM_HANDLE(radv_image_view, iview, image_info->imageView); memcpy(dst, iview->descriptor, 8 * 4); memcpy(dst + 8, iview->fmask_descriptor, 8 * 4); - - if (cmd_buffer) - device->ws->cs_add_buffer(cmd_buffer->cs, iview->bo, 7); - else - *buffer_list = iview->bo; + *buffer_list = iview->bo; } static void write_combined_image_sampler_descriptor(struct radv_device *device, - struct radv_cmd_buffer *cmd_buffer, unsigned *dst, struct radeon_winsys_bo **buffer_list, const VkDescriptorImageInfo *image_info, @@ -594,7 +630,7 @@ write_combined_image_sampler_descriptor(struct radv_device *device, { RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler); - write_image_descriptor(device, cmd_buffer, dst, buffer_list, image_info); + write_image_descriptor(device, dst, buffer_list, image_info); /* copy over sampler state */ if (has_sampler) memcpy(dst + 16, sampler->state, 16); @@ -610,31 +646,22 @@ write_sampler_descriptor(struct radv_device *device, memcpy(dst, sampler->state, 16); } -void radv_update_descriptor_sets( - struct radv_device* device, - struct radv_cmd_buffer* cmd_buffer, - VkDescriptorSet dstSetOverride, +void radv_UpdateDescriptorSets( + VkDevice _device, uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t descriptorCopyCount, const VkCopyDescriptorSet* pDescriptorCopies) { + RADV_FROM_HANDLE(radv_device, device, _device); uint32_t i, j; for (i = 0; i < descriptorWriteCount; i++) { const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i]; - RADV_FROM_HANDLE(radv_descriptor_set, set, - dstSetOverride ? dstSetOverride : writeset->dstSet); + RADV_FROM_HANDLE(radv_descriptor_set, set, writeset->dstSet); const struct radv_descriptor_set_binding_layout *binding_layout = set->layout->binding + writeset->dstBinding; uint32_t *ptr = set->mapped_ptr; struct radeon_winsys_bo **buffer_list = set->descriptors; - /* Immutable samplers are not copied into push descriptors when they are - * allocated, so if we are writing push descriptors we have to copy the - * immutable samplers into them now. - */ - const bool copy_immutable_samplers = cmd_buffer && - binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal; - const uint32_t *samplers = radv_immutable_samplers(set->layout, binding_layout); ptr += binding_layout->offset / 4; ptr += binding_layout->size * writeset->dstArrayElement / 4; @@ -646,44 +673,35 @@ void radv_update_descriptor_sets( case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { unsigned idx = writeset->dstArrayElement + j; idx += binding_layout->dynamic_offset_offset; - assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); write_dynamic_buffer_descriptor(device, set->dynamic_descriptors + idx, buffer_list, writeset->pBufferInfo + j); break; } case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - write_buffer_descriptor(device, cmd_buffer, ptr, buffer_list, + write_buffer_descriptor(device, ptr, buffer_list, writeset->pBufferInfo + j); break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list, + write_texel_buffer_descriptor(device, ptr, buffer_list, writeset->pTexelBufferView[j]); break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - write_image_descriptor(device, cmd_buffer, ptr, buffer_list, + write_image_descriptor(device, ptr, buffer_list, writeset->pImageInfo + j); break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - write_combined_image_sampler_descriptor(device, cmd_buffer, ptr, buffer_list, + write_combined_image_sampler_descriptor(device, ptr, buffer_list, writeset->pImageInfo + j, - !binding_layout->immutable_samplers_offset); - if (copy_immutable_samplers) { - const unsigned idx = writeset->dstArrayElement + j; - memcpy(ptr + 16, samplers + 4 * idx, 16); - } + !binding_layout->immutable_samplers); break; case VK_DESCRIPTOR_TYPE_SAMPLER: - if (!binding_layout->immutable_samplers_offset) { - write_sampler_descriptor(device, ptr, - writeset->pImageInfo + j); - } else if (copy_immutable_samplers) { - unsigned idx = writeset->dstArrayElement + j; - memcpy(ptr, samplers + 4 * idx, 16); - } + assert(!binding_layout->immutable_samplers); + write_sampler_descriptor(device, ptr, + writeset->pImageInfo + j); break; default: unreachable("unimplemented descriptor type"); @@ -697,180 +715,3 @@ void radv_update_descriptor_sets( if (descriptorCopyCount) radv_finishme("copy descriptors"); } - -void radv_UpdateDescriptorSets( - VkDevice _device, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet* pDescriptorWrites, - uint32_t descriptorCopyCount, - const VkCopyDescriptorSet* pDescriptorCopies) -{ - RADV_FROM_HANDLE(radv_device, device, _device); - - radv_update_descriptor_sets(device, NULL, VK_NULL_HANDLE, descriptorWriteCount, pDescriptorWrites, - descriptorCopyCount, pDescriptorCopies); -} - -VkResult radv_CreateDescriptorUpdateTemplateKHR(VkDevice _device, - const VkDescriptorUpdateTemplateCreateInfoKHR *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDescriptorUpdateTemplateKHR *pDescriptorUpdateTemplate) -{ - RADV_FROM_HANDLE(radv_device, device, _device); - RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, pCreateInfo->descriptorSetLayout); - const uint32_t entry_count = pCreateInfo->descriptorUpdateEntryCount; - const size_t size = sizeof(struct radv_descriptor_update_template) + - sizeof(struct radv_descriptor_update_template_entry) * entry_count; - struct radv_descriptor_update_template *templ; - uint32_t i; - - templ = vk_alloc2(&device->alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!templ) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - templ->entry_count = entry_count; - - for (i = 0; i < entry_count; i++) { - const VkDescriptorUpdateTemplateEntryKHR *entry = &pCreateInfo->pDescriptorUpdateEntries[i]; - const struct radv_descriptor_set_binding_layout *binding_layout = - set_layout->binding + entry->dstBinding; - const uint32_t buffer_offset = binding_layout->buffer_offset + - binding_layout->buffer_count * entry->dstArrayElement; - const uint32_t *immutable_samplers = NULL; - uint32_t dst_offset; - uint32_t dst_stride; - - /* dst_offset is an offset into dynamic_descriptors when the descriptor - is dynamic, and an offset into mapped_ptr otherwise */ - switch (entry->descriptorType) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - assert(pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR); - dst_offset = binding_layout->dynamic_offset_offset + entry->dstArrayElement; - dst_stride = 0; /* Not used */ - break; - default: - switch (entry->descriptorType) { - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLER: - /* Immutable samplers are copied into push descriptors when they are pushed */ - if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR && - binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal) { - immutable_samplers = radv_immutable_samplers(set_layout, binding_layout) + entry->dstArrayElement * 4; - } - break; - default: - break; - } - dst_offset = binding_layout->offset / 4 + binding_layout->size * entry->dstArrayElement / 4; - dst_stride = binding_layout->size / 4; - break; - } - - templ->entry[i] = (struct radv_descriptor_update_template_entry) { - .descriptor_type = entry->descriptorType, - .descriptor_count = entry->descriptorCount, - .src_offset = entry->offset, - .src_stride = entry->stride, - .dst_offset = dst_offset, - .dst_stride = dst_stride, - .buffer_offset = buffer_offset, - .buffer_count = binding_layout->buffer_count, - .has_sampler = !binding_layout->immutable_samplers_offset, - .immutable_samplers = immutable_samplers - }; - } - - *pDescriptorUpdateTemplate = radv_descriptor_update_template_to_handle(templ); - return VK_SUCCESS; -} - -void radv_DestroyDescriptorUpdateTemplateKHR(VkDevice _device, - VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, - const VkAllocationCallbacks *pAllocator) -{ - RADV_FROM_HANDLE(radv_device, device, _device); - RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate); - - if (!templ) - return; - - vk_free2(&device->alloc, pAllocator, templ); -} - -void radv_update_descriptor_set_with_template(struct radv_device *device, - struct radv_cmd_buffer *cmd_buffer, - struct radv_descriptor_set *set, - VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, - const void *pData) -{ - RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate); - uint32_t i; - - for (i = 0; i < templ->entry_count; ++i) { - struct radeon_winsys_bo **buffer_list = set->descriptors + templ->entry[i].buffer_offset; - uint32_t *pDst = set->mapped_ptr + templ->entry[i].dst_offset; - const uint8_t *pSrc = ((const uint8_t *) pData) + templ->entry[i].src_offset; - uint32_t j; - - for (j = 0; j < templ->entry[i].descriptor_count; ++j) { - switch (templ->entry[i].descriptor_type) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { - const unsigned idx = templ->entry[i].dst_offset + j; - assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); - write_dynamic_buffer_descriptor(device, set->dynamic_descriptors + idx, - buffer_list, (struct VkDescriptorBufferInfo *) pSrc); - break; - } - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - write_buffer_descriptor(device, cmd_buffer, pDst, buffer_list, - (struct VkDescriptorBufferInfo *) pSrc); - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list, - *(VkBufferView *) pSrc); - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - write_image_descriptor(device, cmd_buffer, pDst, buffer_list, - (struct VkDescriptorImageInfo *) pSrc); - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - write_combined_image_sampler_descriptor(device, cmd_buffer, pDst, buffer_list, - (struct VkDescriptorImageInfo *) pSrc, - templ->entry[i].has_sampler); - if (templ->entry[i].immutable_samplers) - memcpy(pDst + 16, templ->entry[i].immutable_samplers + 4 * j, 16); - break; - case VK_DESCRIPTOR_TYPE_SAMPLER: - if (templ->entry[i].has_sampler) - write_sampler_descriptor(device, pDst, - (struct VkDescriptorImageInfo *) pSrc); - else if (templ->entry[i].immutable_samplers) - memcpy(pDst, templ->entry[i].immutable_samplers + 4 * j, 16); - break; - default: - unreachable("unimplemented descriptor type"); - break; - } - pSrc += templ->entry[i].src_stride; - pDst += templ->entry[i].dst_stride; - buffer_list += templ->entry[i].buffer_count; - } - } -} - -void radv_UpdateDescriptorSetWithTemplateKHR(VkDevice _device, - VkDescriptorSet descriptorSet, - VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, - const void *pData) -{ - RADV_FROM_HANDLE(radv_device, device, _device); - RADV_FROM_HANDLE(radv_descriptor_set, set, descriptorSet); - - radv_update_descriptor_set_with_template(device, NULL, set, descriptorUpdateTemplate, pData); -} diff --git a/lib/mesa/src/amd/vulkan/radv_descriptor_set.h b/lib/mesa/src/amd/vulkan/radv_descriptor_set.h index a9f4bc649..067482275 100644 --- a/lib/mesa/src/amd/vulkan/radv_descriptor_set.h +++ b/lib/mesa/src/amd/vulkan/radv_descriptor_set.h @@ -32,39 +32,34 @@ struct radv_descriptor_set_binding_layout { VkDescriptorType type; /* Number of array elements in this binding */ - uint32_t array_size; + uint16_t array_size; - uint32_t offset; - uint32_t buffer_offset; + uint16_t offset; + uint16_t buffer_offset; uint16_t dynamic_offset_offset; /* redundant with the type, each for a single array element */ - uint32_t size; - uint32_t buffer_count; + uint16_t size; + uint16_t buffer_count; uint16_t dynamic_offset_count; - /* Offset in the radv_descriptor_set_layout of the immutable samplers, or 0 - * if there are no immutable samplers. */ - uint32_t immutable_samplers_offset; - bool immutable_samplers_equal; + /* Immutable samplers (or NULL if no immutable samplers) */ + struct radv_sampler **immutable_samplers; }; struct radv_descriptor_set_layout { - /* The create flags for this descriptor set layout */ - VkDescriptorSetLayoutCreateFlags flags; - /* Number of bindings in this descriptor set */ - uint32_t binding_count; + uint16_t binding_count; /* Total size of the descriptor set with room for all array entries */ - uint32_t size; + uint16_t size; /* Shader stages affected by this descriptor set */ uint16_t shader_stages; uint16_t dynamic_shader_stages; /* Number of buffers in this descriptor set */ - uint32_t buffer_count; + uint16_t buffer_count; /* Number of dynamic offsets used by this descriptor set */ uint16_t dynamic_offset_count; @@ -87,9 +82,4 @@ struct radv_pipeline_layout { unsigned char sha1[20]; }; -static inline const uint32_t * -radv_immutable_samplers(const struct radv_descriptor_set_layout *set, - const struct radv_descriptor_set_binding_layout *binding) { - return (const uint32_t*)((const char*)set + binding->immutable_samplers_offset); -} #endif /* RADV_DESCRIPTOR_SET_H */ diff --git a/lib/mesa/src/amd/vulkan/radv_device.c b/lib/mesa/src/amd/vulkan/radv_device.c index 33c75c2a3..94a2ef006 100644 --- a/lib/mesa/src/amd/vulkan/radv_device.c +++ b/lib/mesa/src/amd/vulkan/radv_device.c @@ -30,10 +30,8 @@ #include <unistd.h> #include <fcntl.h> #include "radv_private.h" -#include "radv_cs.h" -#include "util/disk_cache.h" #include "util/strtod.h" -#include "util/vk_util.h" + #include <xf86drm.h> #include <amdgpu.h> #include <amdgpu_drm.h> @@ -42,150 +40,9 @@ #include "ac_llvm_util.h" #include "vk_format.h" #include "sid.h" +#include "radv_timestamp.h" #include "util/debug.h" - -static int -radv_device_get_cache_uuid(enum radeon_family family, void *uuid) -{ - uint32_t mesa_timestamp, llvm_timestamp; - uint16_t f = family; - memset(uuid, 0, VK_UUID_SIZE); - if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) || - !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp)) - return -1; - - memcpy(uuid, &mesa_timestamp, 4); - memcpy((char*)uuid + 4, &llvm_timestamp, 4); - memcpy((char*)uuid + 8, &f, 2); - snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv"); - return 0; -} - -static const VkExtensionProperties instance_extensions[] = { - { - .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, - .specVersion = 25, - }, -#ifdef VK_USE_PLATFORM_XCB_KHR - { - .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, - .specVersion = 6, - }, -#endif -#ifdef VK_USE_PLATFORM_XLIB_KHR - { - .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME, - .specVersion = 6, - }, -#endif -#ifdef VK_USE_PLATFORM_WAYLAND_KHR - { - .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, - .specVersion = 6, - }, -#endif - { - .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, - .specVersion = 1, - }, -}; - -static const VkExtensionProperties common_device_extensions[] = { - { - .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, - .specVersion = 1, - }, - { - .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME, - .specVersion = 1, - }, - { - .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME, - .specVersion = 1, - }, - { - .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, - .specVersion = 1, - }, - { - .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, - .specVersion = 1, - }, - { - .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, - .specVersion = 68, - }, - { - .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME, - .specVersion = 1, - }, - { - .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, - .specVersion = 1, - }, - { - .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME, - .specVersion = 1, - }, -}; - -static VkResult -radv_extensions_register(struct radv_instance *instance, - struct radv_extensions *extensions, - const VkExtensionProperties *new_ext, - uint32_t num_ext) -{ - size_t new_size; - VkExtensionProperties *new_ptr; - - assert(new_ext && num_ext > 0); - - if (!new_ext) - return VK_ERROR_INITIALIZATION_FAILED; - - new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties); - new_ptr = vk_realloc(&instance->alloc, extensions->ext_array, - new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - - /* Old array continues to be valid, update nothing */ - if (!new_ptr) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - memcpy(&new_ptr[extensions->num_ext], new_ext, - num_ext * sizeof(VkExtensionProperties)); - extensions->ext_array = new_ptr; - extensions->num_ext += num_ext; - - return VK_SUCCESS; -} - -static void -radv_extensions_finish(struct radv_instance *instance, - struct radv_extensions *extensions) -{ - assert(extensions); - - if (!extensions) - radv_loge("Attemted to free invalid extension struct\n"); - - if (extensions->ext_array) - vk_free(&instance->alloc, extensions->ext_array); -} - -static bool -is_extension_enabled(const VkExtensionProperties *extensions, - size_t num_ext, - const char *name) -{ - assert(extensions && name); - - for (uint32_t i = 0; i < num_ext; i++) { - if (strcmp(name, extensions[i].extensionName) == 0) - return true; - } - - return false; -} +struct radv_dispatch_table dtable; static VkResult radv_physical_device_init(struct radv_physical_device *device, @@ -198,7 +55,8 @@ radv_physical_device_init(struct radv_physical_device *device, fd = open(path, O_RDWR | O_CLOEXEC); if (fd < 0) - return VK_ERROR_INCOMPATIBLE_DRIVER; + return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, + "failed to open %s: %m", path); version = drmGetVersion(fd); if (!version) { @@ -219,13 +77,11 @@ radv_physical_device_init(struct radv_physical_device *device, assert(strlen(path) < ARRAY_SIZE(device->path)); strncpy(device->path, path, ARRAY_SIZE(device->path)); - device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags); + device->ws = radv_amdgpu_winsys_create(fd); if (!device->ws) { result = VK_ERROR_INCOMPATIBLE_DRIVER; goto fail; } - - device->local_fd = fd; device->ws->query_info(device->ws, &device->rad_info); result = radv_init_wsi(device); if (result != VK_SUCCESS) { @@ -233,24 +89,8 @@ radv_physical_device_init(struct radv_physical_device *device, goto fail; } - if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) { - radv_finish_wsi(device); - device->ws->destroy(device->ws); - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "cannot generate UUID"); - goto fail; - } - - result = radv_extensions_register(instance, - &device->extensions, - common_device_extensions, - ARRAY_SIZE(common_device_extensions)); - if (result != VK_SUCCESS) - goto fail; - fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n"); device->name = device->rad_info.name; - return VK_SUCCESS; fail: @@ -261,12 +101,41 @@ fail: static void radv_physical_device_finish(struct radv_physical_device *device) { - radv_extensions_finish(device->instance, &device->extensions); radv_finish_wsi(device); device->ws->destroy(device->ws); - close(device->local_fd); } +static const VkExtensionProperties global_extensions[] = { + { + .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, + .specVersion = 25, + }, +#ifdef VK_USE_PLATFORM_XCB_KHR + { + .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, + .specVersion = 6, + }, +#endif +#ifdef VK_USE_PLATFORM_XLIB_KHR + { + .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME, + .specVersion = 6, + }, +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + { + .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, + .specVersion = 5, + }, +#endif +}; + +static const VkExtensionProperties device_extensions[] = { + { + .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, + .specVersion = 68, + }, +}; static void * default_alloc_func(void *pUserData, size_t size, size_t align, @@ -295,20 +164,6 @@ static const VkAllocationCallbacks default_alloc = { .pfnFree = default_free_func, }; -static const struct debug_control radv_debug_options[] = { - {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS}, - {"nodcc", RADV_DEBUG_NO_DCC}, - {"shaders", RADV_DEBUG_DUMP_SHADERS}, - {"nocache", RADV_DEBUG_NO_CACHE}, - {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS}, - {"nohiz", RADV_DEBUG_NO_HIZ}, - {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE}, - {"unsafemath", RADV_DEBUG_UNSAFE_MATH}, - {"allbos", RADV_DEBUG_ALL_BOS}, - {"noibs", RADV_DEBUG_NO_IBS}, - {NULL, 0} -}; - VkResult radv_CreateInstance( const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, @@ -336,9 +191,15 @@ VkResult radv_CreateInstance( } for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { - if (!is_extension_enabled(instance_extensions, - ARRAY_SIZE(instance_extensions), - pCreateInfo->ppEnabledExtensionNames[i])) + bool found = false; + for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + global_extensions[j].extensionName) == 0) { + found = true; + break; + } + } + if (!found) return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); } @@ -347,8 +208,6 @@ VkResult radv_CreateInstance( if (!instance) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - memset(instance, 0, sizeof(*instance)); - instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; if (pAllocator) @@ -363,9 +222,6 @@ VkResult radv_CreateInstance( VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); - instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), - radv_debug_options); - *pInstance = radv_instance_to_handle(instance); return VK_SUCCESS; @@ -377,11 +233,10 @@ void radv_DestroyInstance( { RADV_FROM_HANDLE(radv_instance, instance, _instance); - if (!instance) - return; - - for (int i = 0; i < instance->physicalDeviceCount; ++i) { - radv_physical_device_finish(instance->physicalDevices + i); + if (instance->physicalDeviceCount > 0) { + /* We support at most one physical device. */ + assert(instance->physicalDeviceCount == 1); + radv_physical_device_finish(&instance->physicalDevice); } VG(VALGRIND_DESTROY_MEMPOOL(instance)); @@ -391,40 +246,6 @@ void radv_DestroyInstance( vk_free(&instance->alloc, instance); } -static VkResult -radv_enumerate_devices(struct radv_instance *instance) -{ - /* TODO: Check for more devices ? */ - drmDevicePtr devices[8]; - VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER; - int max_devices; - - instance->physicalDeviceCount = 0; - - max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices)); - if (max_devices < 1) - return VK_ERROR_INCOMPATIBLE_DRIVER; - - for (unsigned i = 0; i < (unsigned)max_devices; i++) { - if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER && - devices[i]->bustype == DRM_BUS_PCI && - devices[i]->deviceinfo.pci->vendor_id == 0x1002) { - - result = radv_physical_device_init(instance->physicalDevices + - instance->physicalDeviceCount, - instance, - devices[i]->nodes[DRM_NODE_RENDER]); - if (result == VK_SUCCESS) - ++instance->physicalDeviceCount; - else if (result != VK_ERROR_INCOMPATIBLE_DRIVER) - break; - } - } - drmFreeDevices(devices, max_devices); - - return result; -} - VkResult radv_EnumeratePhysicalDevices( VkInstance _instance, uint32_t* pPhysicalDeviceCount, @@ -434,22 +255,53 @@ VkResult radv_EnumeratePhysicalDevices( VkResult result; if (instance->physicalDeviceCount < 0) { - result = radv_enumerate_devices(instance); - if (result != VK_SUCCESS && - result != VK_ERROR_INCOMPATIBLE_DRIVER) + char path[20]; + for (unsigned i = 0; i < 8; i++) { + snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i); + result = radv_physical_device_init(&instance->physicalDevice, + instance, path); + if (result != VK_ERROR_INCOMPATIBLE_DRIVER) + break; + } + + if (result == VK_ERROR_INCOMPATIBLE_DRIVER) { + instance->physicalDeviceCount = 0; + } else if (result == VK_SUCCESS) { + instance->physicalDeviceCount = 1; + } else { return result; + } } + /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; + * otherwise it's an inout parameter. + * + * The Vulkan spec (git aaed022) says: + * + * pPhysicalDeviceCount is a pointer to an unsigned integer variable + * that is initialized with the number of devices the application is + * prepared to receive handles to. pname:pPhysicalDevices is pointer to + * an array of at least this many VkPhysicalDevice handles [...]. + * + * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices + * overwrites the contents of the variable pointed to by + * pPhysicalDeviceCount with the number of physical devices in in the + * instance; otherwise, vkEnumeratePhysicalDevices overwrites + * pPhysicalDeviceCount with the number of physical handles written to + * pPhysicalDevices. + */ if (!pPhysicalDevices) { *pPhysicalDeviceCount = instance->physicalDeviceCount; + } else if (*pPhysicalDeviceCount >= 1) { + pPhysicalDevices[0] = radv_physical_device_to_handle(&instance->physicalDevice); + *pPhysicalDeviceCount = 1; + } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount) { + return VK_INCOMPLETE; } else { - *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount); - for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i) - pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i); + *pPhysicalDeviceCount = 0; } - return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE - : VK_SUCCESS; + return VK_SUCCESS; } void radv_GetPhysicalDeviceFeatures( @@ -465,8 +317,8 @@ void radv_GetPhysicalDeviceFeatures( .fullDrawIndexUint32 = true, .imageCubeArray = true, .independentBlend = true, - .geometryShader = true, - .tessellationShader = true, + .geometryShader = false, + .tessellationShader = false, .sampleRateShading = false, .dualSrcBlend = true, .logicOp = true, @@ -479,63 +331,41 @@ void radv_GetPhysicalDeviceFeatures( .wideLines = true, .largePoints = true, .alphaToOne = true, - .multiViewport = true, - .samplerAnisotropy = true, + .multiViewport = false, + .samplerAnisotropy = false, /* FINISHME */ .textureCompressionETC2 = false, .textureCompressionASTC_LDR = false, .textureCompressionBC = true, .occlusionQueryPrecise = true, - .pipelineStatisticsQuery = true, + .pipelineStatisticsQuery = false, .vertexPipelineStoresAndAtomics = true, .fragmentStoresAndAtomics = true, .shaderTessellationAndGeometryPointSize = true, - .shaderImageGatherExtended = true, - .shaderStorageImageExtendedFormats = true, + .shaderImageGatherExtended = false, + .shaderStorageImageExtendedFormats = false, .shaderStorageImageMultisample = false, .shaderUniformBufferArrayDynamicIndexing = true, .shaderSampledImageArrayDynamicIndexing = true, .shaderStorageBufferArrayDynamicIndexing = true, .shaderStorageImageArrayDynamicIndexing = true, - .shaderStorageImageReadWithoutFormat = true, + .shaderStorageImageReadWithoutFormat = false, .shaderStorageImageWriteWithoutFormat = true, .shaderClipDistance = true, .shaderCullDistance = true, - .shaderFloat64 = true, + .shaderFloat64 = false, .shaderInt64 = false, .shaderInt16 = false, - .sparseBinding = true, - .variableMultisampleRate = true, - .inheritedQueries = true, + .alphaToOne = true, + .variableMultisampleRate = false, + .inheritedQueries = false, }; } -void radv_GetPhysicalDeviceFeatures2KHR( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceFeatures2KHR *pFeatures) -{ - return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features); -} - -static uint32_t radv_get_driver_version() -{ - const char *minor_string = strchr(VERSION, '.'); - const char *patch_string = minor_string ? strchr(minor_string + 1, ','): NULL; - int major = atoi(VERSION); - int minor = minor_string ? atoi(minor_string + 1) : 0; - int patch = patch_string ? atoi(patch_string + 1) : 0; - if (strstr(VERSION, "devel")) { - if (patch == 0) { - patch = 99; - if (minor == 0) { - minor = 99; - --major; - } else - --minor; - } else - --patch; - } - uint32_t version = VK_MAKE_VERSION(major, minor, patch); - return version; +void +radv_device_get_cache_uuid(void *uuid) +{ + memset(uuid, 0, VK_UUID_SIZE); + snprintf(uuid, VK_UUID_SIZE, "radv-%s", RADV_TIMESTAMP); } void radv_GetPhysicalDeviceProperties( @@ -544,20 +374,6 @@ void radv_GetPhysicalDeviceProperties( { RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); VkSampleCountFlags sample_counts = 0xf; - - /* make sure that the entire descriptor set is addressable with a signed - * 32-bit int. So the sum of all limits scaled by descriptor size has to - * be at most 2 GiB. the combined image & samples object count as one of - * both. This limit is for the pipeline layout, not for the set layout, but - * there is no set limit, so we just set a pipeline limit. I don't think - * any app is going to hit this soon. */ - size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) / - (32 /* uniform buffer, 32 due to potential space wasted on alignement */ + - 32 /* storage buffer, 32 due to potential space wasted on alignement */ + - 32 /* sampler, largest when combined with image */ + - 64 /* sampled image */ + - 64 /* storage image */); - VkPhysicalDeviceLimits limits = { .maxImageDimension1D = (1 << 14), .maxImageDimension2D = (1 << 14), @@ -571,52 +387,52 @@ void radv_GetPhysicalDeviceProperties( .maxMemoryAllocationCount = UINT32_MAX, .maxSamplerAllocationCount = 64 * 1024, .bufferImageGranularity = 64, /* A cache line */ - .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */ + .sparseAddressSpaceSize = 0, .maxBoundDescriptorSets = MAX_SETS, - .maxPerStageDescriptorSamplers = max_descriptor_set_size, - .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size, - .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size, - .maxPerStageDescriptorSampledImages = max_descriptor_set_size, - .maxPerStageDescriptorStorageImages = max_descriptor_set_size, - .maxPerStageDescriptorInputAttachments = max_descriptor_set_size, - .maxPerStageResources = max_descriptor_set_size, - .maxDescriptorSetSamplers = max_descriptor_set_size, - .maxDescriptorSetUniformBuffers = max_descriptor_set_size, - .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, - .maxDescriptorSetStorageBuffers = max_descriptor_set_size, - .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, - .maxDescriptorSetSampledImages = max_descriptor_set_size, - .maxDescriptorSetStorageImages = max_descriptor_set_size, - .maxDescriptorSetInputAttachments = max_descriptor_set_size, + .maxPerStageDescriptorSamplers = 64, + .maxPerStageDescriptorUniformBuffers = 64, + .maxPerStageDescriptorStorageBuffers = 64, + .maxPerStageDescriptorSampledImages = 64, + .maxPerStageDescriptorStorageImages = 64, + .maxPerStageDescriptorInputAttachments = 64, + .maxPerStageResources = 128, + .maxDescriptorSetSamplers = 256, + .maxDescriptorSetUniformBuffers = 256, + .maxDescriptorSetUniformBuffersDynamic = 256, + .maxDescriptorSetStorageBuffers = 256, + .maxDescriptorSetStorageBuffersDynamic = 256, + .maxDescriptorSetSampledImages = 256, + .maxDescriptorSetStorageImages = 256, + .maxDescriptorSetInputAttachments = 256, .maxVertexInputAttributes = 32, .maxVertexInputBindings = 32, .maxVertexInputAttributeOffset = 2047, .maxVertexInputBindingStride = 2048, .maxVertexOutputComponents = 128, - .maxTessellationGenerationLevel = 64, - .maxTessellationPatchSize = 32, - .maxTessellationControlPerVertexInputComponents = 128, - .maxTessellationControlPerVertexOutputComponents = 128, - .maxTessellationControlPerPatchOutputComponents = 120, - .maxTessellationControlTotalOutputComponents = 4096, - .maxTessellationEvaluationInputComponents = 128, - .maxTessellationEvaluationOutputComponents = 128, - .maxGeometryShaderInvocations = 127, + .maxTessellationGenerationLevel = 0, + .maxTessellationPatchSize = 0, + .maxTessellationControlPerVertexInputComponents = 0, + .maxTessellationControlPerVertexOutputComponents = 0, + .maxTessellationControlPerPatchOutputComponents = 0, + .maxTessellationControlTotalOutputComponents = 0, + .maxTessellationEvaluationInputComponents = 0, + .maxTessellationEvaluationOutputComponents = 0, + .maxGeometryShaderInvocations = 32, .maxGeometryInputComponents = 64, .maxGeometryOutputComponents = 128, .maxGeometryOutputVertices = 256, .maxGeometryTotalOutputComponents = 1024, .maxFragmentInputComponents = 128, .maxFragmentOutputAttachments = 8, - .maxFragmentDualSrcAttachments = 1, + .maxFragmentDualSrcAttachments = 2, .maxFragmentCombinedOutputResources = 8, .maxComputeSharedMemorySize = 32768, .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, - .maxComputeWorkGroupInvocations = 2048, + .maxComputeWorkGroupInvocations = 16 * 1024, .maxComputeWorkGroupSize = { - 2048, - 2048, - 2048 + 16 * 1024/*devinfo->max_cs_threads*/, + 16 * 1024, + 16 * 1024 }, .subPixelPrecisionBits = 4 /* FIXME */, .subTexelPrecisionBits = 4 /* FIXME */, @@ -633,13 +449,13 @@ void radv_GetPhysicalDeviceProperties( .minTexelBufferOffsetAlignment = 1, .minUniformBufferOffsetAlignment = 4, .minStorageBufferOffsetAlignment = 4, - .minTexelOffset = -32, - .maxTexelOffset = 31, - .minTexelGatherOffset = -32, - .maxTexelGatherOffset = 31, - .minInterpolationOffset = -2, - .maxInterpolationOffset = 2, - .subPixelInterpolationOffsetBits = 8, + .minTexelOffset = -8, + .maxTexelOffset = 7, + .minTexelGatherOffset = -8, + .maxTexelGatherOffset = 7, + .minInterpolationOffset = 0, /* FIXME */ + .maxInterpolationOffset = 0, /* FIXME */ + .subPixelInterpolationOffsetBits = 0, /* FIXME */ .maxFramebufferWidth = (1 << 14), .maxFramebufferHeight = (1 << 14), .maxFramebufferLayers = (1 << 10), @@ -655,7 +471,7 @@ void radv_GetPhysicalDeviceProperties( .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, .maxSampleMaskWords = 1, .timestampComputeAndGraphics = false, - .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq, + .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq, .maxClipDistances = 8, .maxCullDistances = 8, .maxCombinedClipAndCullDistances = 8, @@ -672,89 +488,17 @@ void radv_GetPhysicalDeviceProperties( }; *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = VK_MAKE_VERSION(1, 0, 42), - .driverVersion = radv_get_driver_version(), + .apiVersion = VK_MAKE_VERSION(1, 0, 5), + .driverVersion = 1, .vendorID = 0x1002, .deviceID = pdevice->rad_info.pci_id, - .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU, .limits = limits, - .sparseProperties = {0}, + .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ }; strcpy(pProperties->deviceName, pdevice->name); - memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE); -} - -void radv_GetPhysicalDeviceProperties2KHR( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties2KHR *pProperties) -{ - radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); - - vk_foreach_struct(ext, pProperties->pNext) { - switch (ext->sType) { - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { - VkPhysicalDevicePushDescriptorPropertiesKHR *properties = - (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext; - properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; - break; - } - default: - break; - } - } -} - -static void radv_get_physical_device_queue_family_properties( - struct radv_physical_device* pdevice, - uint32_t* pCount, - VkQueueFamilyProperties** pQueueFamilyProperties) -{ - int num_queue_families = 1; - int idx; - if (pdevice->rad_info.compute_rings > 0 && - pdevice->rad_info.chip_class >= CIK && - !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) - num_queue_families++; - - if (pQueueFamilyProperties == NULL) { - *pCount = num_queue_families; - return; - } - - if (!*pCount) - return; - - idx = 0; - if (*pCount >= 1) { - *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) { - .queueFlags = VK_QUEUE_GRAPHICS_BIT | - VK_QUEUE_COMPUTE_BIT | - VK_QUEUE_TRANSFER_BIT | - VK_QUEUE_SPARSE_BINDING_BIT, - .queueCount = 1, - .timestampValidBits = 64, - .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, - }; - idx++; - } - - if (pdevice->rad_info.compute_rings > 0 && - pdevice->rad_info.chip_class >= CIK && - !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) { - if (*pCount > idx) { - *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) { - .queueFlags = VK_QUEUE_COMPUTE_BIT | - VK_QUEUE_TRANSFER_BIT | - VK_QUEUE_SPARSE_BINDING_BIT, - .queueCount = pdevice->rad_info.compute_rings, - .timestampValidBits = 64, - .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, - }; - idx++; - } - } - *pCount = idx; + radv_device_get_cache_uuid(pProperties->pipelineCacheUUID); } void radv_GetPhysicalDeviceQueueFamilyProperties( @@ -762,110 +506,62 @@ void radv_GetPhysicalDeviceQueueFamilyProperties( uint32_t* pCount, VkQueueFamilyProperties* pQueueFamilyProperties) { - RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); - if (!pQueueFamilyProperties) { - return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL); + if (pQueueFamilyProperties == NULL) { + *pCount = 1; return; } - VkQueueFamilyProperties *properties[] = { - pQueueFamilyProperties + 0, - pQueueFamilyProperties + 1, - pQueueFamilyProperties + 2, - }; - radv_get_physical_device_queue_family_properties(pdevice, pCount, properties); - assert(*pCount <= 3); -} + assert(*pCount >= 1); -void radv_GetPhysicalDeviceQueueFamilyProperties2KHR( - VkPhysicalDevice physicalDevice, - uint32_t* pCount, - VkQueueFamilyProperties2KHR *pQueueFamilyProperties) -{ - RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); - if (!pQueueFamilyProperties) { - return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL); - return; - } - VkQueueFamilyProperties *properties[] = { - &pQueueFamilyProperties[0].queueFamilyProperties, - &pQueueFamilyProperties[1].queueFamilyProperties, - &pQueueFamilyProperties[2].queueFamilyProperties, + *pQueueFamilyProperties = (VkQueueFamilyProperties) { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = 1, + .timestampValidBits = 64, + .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, }; - radv_get_physical_device_queue_family_properties(pdevice, pCount, properties); - assert(*pCount <= 3); } void radv_GetPhysicalDeviceMemoryProperties( VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties *pMemoryProperties) + VkPhysicalDeviceMemoryProperties* pMemoryProperties) { RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice); - STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES); - - pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT; - pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) { + pMemoryProperties->memoryTypeCount = 3; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - .heapIndex = RADV_MEM_HEAP_VRAM, - }; - pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - .heapIndex = RADV_MEM_HEAP_GTT, + .heapIndex = 0, }; - pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) { + pMemoryProperties->memoryTypes[1] = (VkMemoryType) { .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS, + .heapIndex = 0, }; - pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + pMemoryProperties->memoryTypes[2] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = RADV_MEM_HEAP_GTT, + .heapIndex = 1, }; - STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS); - uint64_t visible_vram_size = MIN2(physical_device->rad_info.vram_size, - physical_device->rad_info.visible_vram_size); - - pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT; - pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) { - .size = physical_device->rad_info.vram_size - - visible_vram_size, - .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, - }; - pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) { - .size = visible_vram_size, + pMemoryProperties->memoryHeapCount = 2; + pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { + .size = physical_device->rad_info.vram_size, .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, }; - pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) { + pMemoryProperties->memoryHeaps[1] = (VkMemoryHeap) { .size = physical_device->rad_info.gart_size, .flags = 0, }; } -void radv_GetPhysicalDeviceMemoryProperties2KHR( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties) -{ - return radv_GetPhysicalDeviceMemoryProperties(physicalDevice, - &pMemoryProperties->memoryProperties); -} - -static int -radv_queue_init(struct radv_device *device, struct radv_queue *queue, - int queue_family_index, int idx) +static VkResult +radv_queue_init(struct radv_device *device, struct radv_queue *queue) { queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; queue->device = device; - queue->queue_family_index = queue_family_index; - queue->queue_idx = idx; - - queue->hw_ctx = device->ws->ctx_create(device->ws); - if (!queue->hw_ctx) - return VK_ERROR_OUT_OF_HOST_MEMORY; return VK_SUCCESS; } @@ -873,58 +569,6 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, static void radv_queue_finish(struct radv_queue *queue) { - if (queue->hw_ctx) - queue->device->ws->ctx_destroy(queue->hw_ctx); - - if (queue->initial_preamble_cs) - queue->device->ws->cs_destroy(queue->initial_preamble_cs); - if (queue->continue_preamble_cs) - queue->device->ws->cs_destroy(queue->continue_preamble_cs); - if (queue->descriptor_bo) - queue->device->ws->buffer_destroy(queue->descriptor_bo); - if (queue->scratch_bo) - queue->device->ws->buffer_destroy(queue->scratch_bo); - if (queue->esgs_ring_bo) - queue->device->ws->buffer_destroy(queue->esgs_ring_bo); - if (queue->gsvs_ring_bo) - queue->device->ws->buffer_destroy(queue->gsvs_ring_bo); - if (queue->tess_factor_ring_bo) - queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo); - if (queue->tess_offchip_ring_bo) - queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo); - if (queue->compute_scratch_bo) - queue->device->ws->buffer_destroy(queue->compute_scratch_bo); -} - -static void -radv_device_init_gs_info(struct radv_device *device) -{ - switch (device->physical_device->rad_info.family) { - case CHIP_OLAND: - case CHIP_HAINAN: - case CHIP_KAVERI: - case CHIP_KABINI: - case CHIP_MULLINS: - case CHIP_ICELAND: - case CHIP_CARRIZO: - case CHIP_STONEY: - device->gs_table_depth = 16; - return; - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - device->gs_table_depth = 32; - return; - default: - unreachable("unknown GPU"); - } } VkResult radv_CreateDevice( @@ -938,9 +582,15 @@ VkResult radv_CreateDevice( struct radv_device *device; for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { - if (!is_extension_enabled(physical_device->extensions.ext_array, - physical_device->extensions.num_ext, - pCreateInfo->ppEnabledExtensionNames[i])) + bool found = false; + for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + device_extensions[j].extensionName) == 0) { + found = true; + break; + } + } + if (!found) return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); } @@ -950,13 +600,8 @@ VkResult radv_CreateDevice( if (!device) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - memset(device, 0, sizeof(*device)); - device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; device->instance = physical_device->instance; - device->physical_device = physical_device; - - device->debug_flags = device->instance->debug_flags; device->ws = physical_device->ws; if (pAllocator) @@ -964,156 +609,34 @@ VkResult radv_CreateDevice( else device->alloc = physical_device->instance->alloc; - for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { - const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i]; - uint32_t qfi = queue_create->queueFamilyIndex; - - device->queues[qfi] = vk_alloc(&device->alloc, - queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!device->queues[qfi]) { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - goto fail; - } - - memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue)); - - device->queue_count[qfi] = queue_create->queueCount; - - for (unsigned q = 0; q < queue_create->queueCount; q++) { - result = radv_queue_init(device, &device->queues[qfi][q], qfi, q); - if (result != VK_SUCCESS) - goto fail; - } + device->hw_ctx = device->ws->ctx_create(device->ws); + if (!device->hw_ctx) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail_free; } -#if HAVE_LLVM < 0x0400 - device->llvm_supports_spill = false; -#else - device->llvm_supports_spill = true; -#endif - - /* The maximum number of scratch waves. Scratch space isn't divided - * evenly between CUs. The number is only a function of the number of CUs. - * We can decrease the constant to decrease the scratch buffer size. - * - * sctx->scratch_waves must be >= the maximum posible size of - * 1 threadgroup, so that the hw doesn't hang from being unable - * to start any. - * - * The recommended value is 4 per CU at most. Higher numbers don't - * bring much benefit, but they still occupy chip resources (think - * async compute). I've seen ~2% performance difference between 4 and 32. - */ - uint32_t max_threads_per_block = 2048; - device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units, - max_threads_per_block / 64); - - radv_device_init_gs_info(device); - - device->tess_offchip_block_dw_size = - device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192; - device->has_distributed_tess = - device->physical_device->rad_info.chip_class >= VI && - device->physical_device->rad_info.max_se >= 2; + radv_queue_init(device, &device->queue); result = radv_device_init_meta(device); - if (result != VK_SUCCESS) - goto fail; - - radv_device_init_msaa(device); - - for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) { - device->empty_cs[family] = device->ws->cs_create(device->ws, family); - switch (family) { - case RADV_QUEUE_GENERAL: - radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1)); - radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1)); - break; - case RADV_QUEUE_COMPUTE: - radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0)); - radeon_emit(device->empty_cs[family], 0); - break; - } - device->ws->cs_finalize(device->empty_cs[family]); - - device->flush_cs[family] = device->ws->cs_create(device->ws, family); - switch (family) { - case RADV_QUEUE_GENERAL: - case RADV_QUEUE_COMPUTE: - si_cs_emit_cache_flush(device->flush_cs[family], - device->physical_device->rad_info.chip_class, - family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK, - RADV_CMD_FLAG_INV_ICACHE | - RADV_CMD_FLAG_INV_SMEM_L1 | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2); - break; - } - device->ws->cs_finalize(device->flush_cs[family]); - - device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family); - switch (family) { - case RADV_QUEUE_GENERAL: - case RADV_QUEUE_COMPUTE: - si_cs_emit_cache_flush(device->flush_shader_cs[family], - device->physical_device->rad_info.chip_class, - family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK, - family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) | - RADV_CMD_FLAG_INV_ICACHE | - RADV_CMD_FLAG_INV_SMEM_L1 | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2); - break; - } - device->ws->cs_finalize(device->flush_shader_cs[family]); - } - - if (getenv("RADV_TRACE_FILE")) { - device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8, - RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS); - if (!device->trace_bo) - goto fail; - - device->trace_id_ptr = device->ws->buffer_map(device->trace_bo); - if (!device->trace_id_ptr) - goto fail; + if (result != VK_SUCCESS) { + device->ws->ctx_destroy(device->hw_ctx); + goto fail_free; } + device->allow_fast_clears = env_var_as_boolean("RADV_FAST_CLEARS", false); + device->allow_dcc = !env_var_as_boolean("RADV_DCC_DISABLE", false); - if (device->physical_device->rad_info.chip_class >= CIK) - cik_create_gfx_config(device); - - VkPipelineCacheCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; - ci.pNext = NULL; - ci.flags = 0; - ci.pInitialData = NULL; - ci.initialDataSize = 0; - VkPipelineCache pc; - result = radv_CreatePipelineCache(radv_device_to_handle(device), - &ci, NULL, &pc); - if (result != VK_SUCCESS) - goto fail; - - device->mem_cache = radv_pipeline_cache_from_handle(pc); + if (device->allow_fast_clears && device->allow_dcc) + radv_finishme("DCC fast clears have not been tested\n"); + radv_device_init_msaa(device); + device->empty_cs = device->ws->cs_create(device->ws, RING_GFX); + radeon_emit(device->empty_cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); + radeon_emit(device->empty_cs, CONTEXT_CONTROL_LOAD_ENABLE(1)); + radeon_emit(device->empty_cs, CONTEXT_CONTROL_SHADOW_ENABLE(1)); + device->ws->cs_finalize(device->empty_cs); *pDevice = radv_device_to_handle(device); return VK_SUCCESS; - -fail: - if (device->trace_bo) - device->ws->buffer_destroy(device->trace_bo); - - if (device->gfx_init) - device->ws->buffer_destroy(device->gfx_init); - - for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { - for (unsigned q = 0; q < device->queue_count[i]; q++) - radv_queue_finish(&device->queues[i][q]); - if (device->queue_count[i]) - vk_free(&device->alloc, device->queues[i]); - } - +fail_free: vk_free(&device->alloc, device); return result; } @@ -1124,32 +647,10 @@ void radv_DestroyDevice( { RADV_FROM_HANDLE(radv_device, device, _device); - if (!device) - return; - - if (device->trace_bo) - device->ws->buffer_destroy(device->trace_bo); - - if (device->gfx_init) - device->ws->buffer_destroy(device->gfx_init); - - for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { - for (unsigned q = 0; q < device->queue_count[i]; q++) - radv_queue_finish(&device->queues[i][q]); - if (device->queue_count[i]) - vk_free(&device->alloc, device->queues[i]); - if (device->empty_cs[i]) - device->ws->cs_destroy(device->empty_cs[i]); - if (device->flush_cs[i]) - device->ws->cs_destroy(device->flush_cs[i]); - if (device->flush_shader_cs[i]) - device->ws->cs_destroy(device->flush_shader_cs[i]); - } + device->ws->ctx_destroy(device->hw_ctx); + radv_queue_finish(&device->queue); radv_device_finish_meta(device); - VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache); - radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL); - vk_free(&device->alloc, device); } @@ -1159,14 +660,14 @@ VkResult radv_EnumerateInstanceExtensionProperties( VkExtensionProperties* pProperties) { if (pProperties == NULL) { - *pPropertyCount = ARRAY_SIZE(instance_extensions); + *pPropertyCount = ARRAY_SIZE(global_extensions); return VK_SUCCESS; } - *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions)); - typed_memcpy(pProperties, instance_extensions, *pPropertyCount); + *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions)); + typed_memcpy(pProperties, global_extensions, *pPropertyCount); - if (*pPropertyCount < ARRAY_SIZE(instance_extensions)) + if (*pPropertyCount < ARRAY_SIZE(global_extensions)) return VK_INCOMPLETE; return VK_SUCCESS; @@ -1178,17 +679,15 @@ VkResult radv_EnumerateDeviceExtensionProperties( uint32_t* pPropertyCount, VkExtensionProperties* pProperties) { - RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); - if (pProperties == NULL) { - *pPropertyCount = pdevice->extensions.num_ext; + *pPropertyCount = ARRAY_SIZE(device_extensions); return VK_SUCCESS; } - *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext); - typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount); + *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions)); + typed_memcpy(pProperties, device_extensions, *pPropertyCount); - if (*pPropertyCount < pdevice->extensions.num_ext) + if (*pPropertyCount < ARRAY_SIZE(device_extensions)) return VK_INCOMPLETE; return VK_SUCCESS; @@ -1223,579 +722,15 @@ VkResult radv_EnumerateDeviceLayerProperties( void radv_GetDeviceQueue( VkDevice _device, - uint32_t queueFamilyIndex, + uint32_t queueNodeIndex, uint32_t queueIndex, VkQueue* pQueue) { RADV_FROM_HANDLE(radv_device, device, _device); - *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]); -} - -static void radv_dump_trace(struct radv_device *device, - struct radeon_winsys_cs *cs) -{ - const char *filename = getenv("RADV_TRACE_FILE"); - FILE *f = fopen(filename, "w"); - if (!f) { - fprintf(stderr, "Failed to write trace dump to %s\n", filename); - return; - } - - fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr); - device->ws->cs_dump(cs, f, *device->trace_id_ptr); - fclose(f); -} - -static void -fill_geom_tess_rings(struct radv_queue *queue, - uint32_t *map, - bool add_sample_positions, - uint32_t esgs_ring_size, - struct radeon_winsys_bo *esgs_ring_bo, - uint32_t gsvs_ring_size, - struct radeon_winsys_bo *gsvs_ring_bo, - uint32_t tess_factor_ring_size, - struct radeon_winsys_bo *tess_factor_ring_bo, - uint32_t tess_offchip_ring_size, - struct radeon_winsys_bo *tess_offchip_ring_bo) -{ - uint64_t esgs_va = 0, gsvs_va = 0; - uint64_t tess_factor_va = 0, tess_offchip_va = 0; - uint32_t *desc = &map[4]; - - if (esgs_ring_bo) - esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo); - if (gsvs_ring_bo) - gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo); - if (tess_factor_ring_bo) - tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo); - if (tess_offchip_ring_bo) - tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo); - - /* stride 0, num records - size, add tid, swizzle, elsize4, - index stride 64 */ - desc[0] = esgs_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(true); - desc[2] = esgs_ring_size; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(1) | - S_008F0C_INDEX_STRIDE(3) | - S_008F0C_ADD_TID_ENABLE(true); - - desc += 4; - /* GS entry for ES->GS ring */ - /* stride 0, num records - size, elsize0, - index stride 0 */ - desc[0] = esgs_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)| - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); - desc[2] = esgs_ring_size; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(0) | - S_008F0C_INDEX_STRIDE(0) | - S_008F0C_ADD_TID_ENABLE(false); - - desc += 4; - /* VS entry for GS->VS ring */ - /* stride 0, num records - size, elsize0, - index stride 0 */ - desc[0] = gsvs_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); - desc[2] = gsvs_ring_size; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(0) | - S_008F0C_INDEX_STRIDE(0) | - S_008F0C_ADD_TID_ENABLE(false); - desc += 4; - - /* stride gsvs_itemsize, num records 64 - elsize 4, index stride 16 */ - /* shader will patch stride and desc[2] */ - desc[0] = gsvs_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(true); - desc[2] = 0; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(1) | - S_008F0C_INDEX_STRIDE(1) | - S_008F0C_ADD_TID_ENABLE(true); - desc += 4; - - desc[0] = tess_factor_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) | - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); - desc[2] = tess_factor_ring_size; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(0) | - S_008F0C_INDEX_STRIDE(0) | - S_008F0C_ADD_TID_ENABLE(false); - desc += 4; - - desc[0] = tess_offchip_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) | - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); - desc[2] = tess_offchip_ring_size; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(0) | - S_008F0C_INDEX_STRIDE(0) | - S_008F0C_ADD_TID_ENABLE(false); - desc += 4; - - /* add sample positions after all rings */ - memcpy(desc, queue->device->sample_locations_1x, 8); - desc += 2; - memcpy(desc, queue->device->sample_locations_2x, 16); - desc += 4; - memcpy(desc, queue->device->sample_locations_4x, 32); - desc += 8; - memcpy(desc, queue->device->sample_locations_8x, 64); - desc += 16; - memcpy(desc, queue->device->sample_locations_16x, 128); -} - -static unsigned -radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p) -{ - bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK && - device->physical_device->rad_info.family != CHIP_CARRIZO && - device->physical_device->rad_info.family != CHIP_STONEY; - unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; - unsigned max_offchip_buffers = max_offchip_buffers_per_se * - device->physical_device->rad_info.max_se; - unsigned offchip_granularity; - unsigned hs_offchip_param; - switch (device->tess_offchip_block_dw_size) { - default: - assert(0); - /* fall through */ - case 8192: - offchip_granularity = V_03093C_X_8K_DWORDS; - break; - case 4096: - offchip_granularity = V_03093C_X_4K_DWORDS; - break; - } - - switch (device->physical_device->rad_info.chip_class) { - case SI: - max_offchip_buffers = MIN2(max_offchip_buffers, 126); - break; - case CIK: - max_offchip_buffers = MIN2(max_offchip_buffers, 508); - break; - case VI: - default: - max_offchip_buffers = MIN2(max_offchip_buffers, 512); - break; - } - - *max_offchip_buffers_p = max_offchip_buffers; - if (device->physical_device->rad_info.chip_class >= CIK) { - if (device->physical_device->rad_info.chip_class >= VI) - --max_offchip_buffers; - hs_offchip_param = - S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | - S_03093C_OFFCHIP_GRANULARITY(offchip_granularity); - } else { - hs_offchip_param = - S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers); - } - return hs_offchip_param; -} - -static VkResult -radv_get_preamble_cs(struct radv_queue *queue, - uint32_t scratch_size, - uint32_t compute_scratch_size, - uint32_t esgs_ring_size, - uint32_t gsvs_ring_size, - bool needs_tess_rings, - bool needs_sample_positions, - struct radeon_winsys_cs **initial_preamble_cs, - struct radeon_winsys_cs **continue_preamble_cs) -{ - struct radeon_winsys_bo *scratch_bo = NULL; - struct radeon_winsys_bo *descriptor_bo = NULL; - struct radeon_winsys_bo *compute_scratch_bo = NULL; - struct radeon_winsys_bo *esgs_ring_bo = NULL; - struct radeon_winsys_bo *gsvs_ring_bo = NULL; - struct radeon_winsys_bo *tess_factor_ring_bo = NULL; - struct radeon_winsys_bo *tess_offchip_ring_bo = NULL; - struct radeon_winsys_cs *dest_cs[2] = {0}; - bool add_tess_rings = false, add_sample_positions = false; - unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0; - unsigned max_offchip_buffers; - unsigned hs_offchip_param = 0; - if (!queue->has_tess_rings) { - if (needs_tess_rings) - add_tess_rings = true; - } - if (!queue->has_sample_positions) { - if (needs_sample_positions) - add_sample_positions = true; - } - tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se; - hs_offchip_param = radv_get_hs_offchip_param(queue->device, - &max_offchip_buffers); - tess_offchip_ring_size = max_offchip_buffers * - queue->device->tess_offchip_block_dw_size * 4; - - if (scratch_size <= queue->scratch_size && - compute_scratch_size <= queue->compute_scratch_size && - esgs_ring_size <= queue->esgs_ring_size && - gsvs_ring_size <= queue->gsvs_ring_size && - !add_tess_rings && !add_sample_positions && - queue->initial_preamble_cs) { - *initial_preamble_cs = queue->initial_preamble_cs; - *continue_preamble_cs = queue->continue_preamble_cs; - if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) - *continue_preamble_cs = NULL; - return VK_SUCCESS; - } - - if (scratch_size > queue->scratch_size) { - scratch_bo = queue->device->ws->buffer_create(queue->device->ws, - scratch_size, - 4096, - RADEON_DOMAIN_VRAM, - RADEON_FLAG_NO_CPU_ACCESS); - if (!scratch_bo) - goto fail; - } else - scratch_bo = queue->scratch_bo; - - if (compute_scratch_size > queue->compute_scratch_size) { - compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws, - compute_scratch_size, - 4096, - RADEON_DOMAIN_VRAM, - RADEON_FLAG_NO_CPU_ACCESS); - if (!compute_scratch_bo) - goto fail; - - } else - compute_scratch_bo = queue->compute_scratch_bo; - - if (esgs_ring_size > queue->esgs_ring_size) { - esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws, - esgs_ring_size, - 4096, - RADEON_DOMAIN_VRAM, - RADEON_FLAG_NO_CPU_ACCESS); - if (!esgs_ring_bo) - goto fail; - } else { - esgs_ring_bo = queue->esgs_ring_bo; - esgs_ring_size = queue->esgs_ring_size; - } - - if (gsvs_ring_size > queue->gsvs_ring_size) { - gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws, - gsvs_ring_size, - 4096, - RADEON_DOMAIN_VRAM, - RADEON_FLAG_NO_CPU_ACCESS); - if (!gsvs_ring_bo) - goto fail; - } else { - gsvs_ring_bo = queue->gsvs_ring_bo; - gsvs_ring_size = queue->gsvs_ring_size; - } - - if (add_tess_rings) { - tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws, - tess_factor_ring_size, - 256, - RADEON_DOMAIN_VRAM, - RADEON_FLAG_NO_CPU_ACCESS); - if (!tess_factor_ring_bo) - goto fail; - tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws, - tess_offchip_ring_size, - 256, - RADEON_DOMAIN_VRAM, - RADEON_FLAG_NO_CPU_ACCESS); - if (!tess_offchip_ring_bo) - goto fail; - } else { - tess_factor_ring_bo = queue->tess_factor_ring_bo; - tess_offchip_ring_bo = queue->tess_offchip_ring_bo; - } - - if (scratch_bo != queue->scratch_bo || - esgs_ring_bo != queue->esgs_ring_bo || - gsvs_ring_bo != queue->gsvs_ring_bo || - tess_factor_ring_bo != queue->tess_factor_ring_bo || - tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) { - uint32_t size = 0; - if (gsvs_ring_bo || esgs_ring_bo || - tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) { - size = 112; /* 2 dword + 2 padding + 4 dword * 6 */ - if (add_sample_positions) - size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */ - } - else if (scratch_bo) - size = 8; /* 2 dword */ - - descriptor_bo = queue->device->ws->buffer_create(queue->device->ws, - size, - 4096, - RADEON_DOMAIN_VRAM, - RADEON_FLAG_CPU_ACCESS); - if (!descriptor_bo) - goto fail; - } else - descriptor_bo = queue->descriptor_bo; - - for(int i = 0; i < 2; ++i) { - struct radeon_winsys_cs *cs = NULL; - cs = queue->device->ws->cs_create(queue->device->ws, - queue->queue_family_index ? RING_COMPUTE : RING_GFX); - if (!cs) - goto fail; - - dest_cs[i] = cs; - - if (scratch_bo) - queue->device->ws->cs_add_buffer(cs, scratch_bo, 8); - - if (esgs_ring_bo) - queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8); - - if (gsvs_ring_bo) - queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8); - - if (tess_factor_ring_bo) - queue->device->ws->cs_add_buffer(cs, tess_factor_ring_bo, 8); - - if (tess_offchip_ring_bo) - queue->device->ws->cs_add_buffer(cs, tess_offchip_ring_bo, 8); - - if (descriptor_bo) - queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8); - - if (descriptor_bo != queue->descriptor_bo) { - uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo); - - if (scratch_bo) { - uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo); - uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | - S_008F04_SWIZZLE_ENABLE(1); - map[0] = scratch_va; - map[1] = rsrc1; - } - - if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo || - add_sample_positions) - fill_geom_tess_rings(queue, map, add_sample_positions, - esgs_ring_size, esgs_ring_bo, - gsvs_ring_size, gsvs_ring_bo, - tess_factor_ring_size, tess_factor_ring_bo, - tess_offchip_ring_size, tess_offchip_ring_bo); - - queue->device->ws->buffer_unmap(descriptor_bo); - } - - if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); - } - - if (esgs_ring_bo || gsvs_ring_bo) { - if (queue->device->physical_device->rad_info.chip_class >= CIK) { - radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2); - radeon_emit(cs, esgs_ring_size >> 8); - radeon_emit(cs, gsvs_ring_size >> 8); - } else { - radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2); - radeon_emit(cs, esgs_ring_size >> 8); - radeon_emit(cs, gsvs_ring_size >> 8); - } - } - - if (tess_factor_ring_bo) { - uint64_t tf_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo); - if (queue->device->physical_device->rad_info.chip_class >= CIK) { - radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, - S_030938_SIZE(tess_factor_ring_size / 4)); - radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, - tf_va >> 8); - radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param); - } else { - radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, - S_008988_SIZE(tess_factor_ring_size / 4)); - radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, - tf_va >> 8); - radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, - hs_offchip_param); - } - } - - if (descriptor_bo) { - uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, - R_00B130_SPI_SHADER_USER_DATA_VS_0, - R_00B230_SPI_SHADER_USER_DATA_GS_0, - R_00B330_SPI_SHADER_USER_DATA_ES_0, - R_00B430_SPI_SHADER_USER_DATA_HS_0, - R_00B530_SPI_SHADER_USER_DATA_LS_0}; - - uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo); - - for (int i = 0; i < ARRAY_SIZE(regs); ++i) { - radeon_set_sh_reg_seq(cs, regs[i], 2); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - } - } - - if (compute_scratch_bo) { - uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo); - uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | - S_008F04_SWIZZLE_ENABLE(1); - - queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8); - - radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2); - radeon_emit(cs, scratch_va); - radeon_emit(cs, rsrc1); - } - - if (!i) { - si_cs_emit_cache_flush(cs, - queue->device->physical_device->rad_info.chip_class, - queue->queue_family_index == RING_COMPUTE && - queue->device->physical_device->rad_info.chip_class >= CIK, - RADV_CMD_FLAG_INV_ICACHE | - RADV_CMD_FLAG_INV_SMEM_L1 | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2); - } - - if (!queue->device->ws->cs_finalize(cs)) - goto fail; - } - - if (queue->initial_preamble_cs) - queue->device->ws->cs_destroy(queue->initial_preamble_cs); - - if (queue->continue_preamble_cs) - queue->device->ws->cs_destroy(queue->continue_preamble_cs); - - queue->initial_preamble_cs = dest_cs[0]; - queue->continue_preamble_cs = dest_cs[1]; - - if (scratch_bo != queue->scratch_bo) { - if (queue->scratch_bo) - queue->device->ws->buffer_destroy(queue->scratch_bo); - queue->scratch_bo = scratch_bo; - queue->scratch_size = scratch_size; - } - - if (compute_scratch_bo != queue->compute_scratch_bo) { - if (queue->compute_scratch_bo) - queue->device->ws->buffer_destroy(queue->compute_scratch_bo); - queue->compute_scratch_bo = compute_scratch_bo; - queue->compute_scratch_size = compute_scratch_size; - } - - if (esgs_ring_bo != queue->esgs_ring_bo) { - if (queue->esgs_ring_bo) - queue->device->ws->buffer_destroy(queue->esgs_ring_bo); - queue->esgs_ring_bo = esgs_ring_bo; - queue->esgs_ring_size = esgs_ring_size; - } - - if (gsvs_ring_bo != queue->gsvs_ring_bo) { - if (queue->gsvs_ring_bo) - queue->device->ws->buffer_destroy(queue->gsvs_ring_bo); - queue->gsvs_ring_bo = gsvs_ring_bo; - queue->gsvs_ring_size = gsvs_ring_size; - } - - if (tess_factor_ring_bo != queue->tess_factor_ring_bo) { - queue->tess_factor_ring_bo = tess_factor_ring_bo; - } - - if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) { - queue->tess_offchip_ring_bo = tess_offchip_ring_bo; - queue->has_tess_rings = true; - } - - if (descriptor_bo != queue->descriptor_bo) { - if (queue->descriptor_bo) - queue->device->ws->buffer_destroy(queue->descriptor_bo); - - queue->descriptor_bo = descriptor_bo; - } - - if (add_sample_positions) - queue->has_sample_positions = true; + assert(queueIndex == 0); - *initial_preamble_cs = queue->initial_preamble_cs; - *continue_preamble_cs = queue->continue_preamble_cs; - if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) - *continue_preamble_cs = NULL; - return VK_SUCCESS; -fail: - for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i) - if (dest_cs[i]) - queue->device->ws->cs_destroy(dest_cs[i]); - if (descriptor_bo && descriptor_bo != queue->descriptor_bo) - queue->device->ws->buffer_destroy(descriptor_bo); - if (scratch_bo && scratch_bo != queue->scratch_bo) - queue->device->ws->buffer_destroy(scratch_bo); - if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo) - queue->device->ws->buffer_destroy(compute_scratch_bo); - if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo) - queue->device->ws->buffer_destroy(esgs_ring_bo); - if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo) - queue->device->ws->buffer_destroy(gsvs_ring_bo); - if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo) - queue->device->ws->buffer_destroy(tess_factor_ring_bo); - if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo) - queue->device->ws->buffer_destroy(tess_offchip_ring_bo); - return VK_ERROR_OUT_OF_DEVICE_MEMORY; + *pQueue = radv_queue_to_handle(&device->queue); } VkResult radv_QueueSubmit( @@ -1807,129 +742,40 @@ VkResult radv_QueueSubmit( RADV_FROM_HANDLE(radv_queue, queue, _queue); RADV_FROM_HANDLE(radv_fence, fence, _fence); struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL; - struct radeon_winsys_ctx *ctx = queue->hw_ctx; + struct radeon_winsys_ctx *ctx = queue->device->hw_ctx; int ret; - uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX; - uint32_t scratch_size = 0; - uint32_t compute_scratch_size = 0; - uint32_t esgs_ring_size = 0, gsvs_ring_size = 0; - struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL; - VkResult result; - bool fence_emitted = false; - bool tess_rings_needed = false; - bool sample_positions_needed = false; - - /* Do this first so failing to allocate scratch buffers can't result in - * partially executed submissions. */ - for (uint32_t i = 0; i < submitCount; i++) { - for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, - pSubmits[i].pCommandBuffers[j]); - - scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed); - compute_scratch_size = MAX2(compute_scratch_size, - cmd_buffer->compute_scratch_size_needed); - esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed); - gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed); - tess_rings_needed |= cmd_buffer->tess_rings_needed; - sample_positions_needed |= cmd_buffer->sample_positions_needed; - } - } - - result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, - esgs_ring_size, gsvs_ring_size, tess_rings_needed, - sample_positions_needed, - &initial_preamble_cs, &continue_preamble_cs); - if (result != VK_SUCCESS) - return result; for (uint32_t i = 0; i < submitCount; i++) { struct radeon_winsys_cs **cs_array; - bool do_flush = !i || pSubmits[i].pWaitDstStageMask; - bool can_patch = !do_flush; - uint32_t advance; - - if (!pSubmits[i].commandBufferCount) { - if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) { - ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, - &queue->device->empty_cs[queue->queue_family_index], - 1, NULL, NULL, - (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, - pSubmits[i].waitSemaphoreCount, - (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, - pSubmits[i].signalSemaphoreCount, - false, base_fence); - if (ret) { - radv_loge("failed to submit CS %d\n", i); - abort(); - } - fence_emitted = true; - } + bool can_patch = true; + + if (!pSubmits[i].commandBufferCount) continue; - } cs_array = malloc(sizeof(struct radeon_winsys_cs *) * - (pSubmits[i].commandBufferCount + do_flush)); - - if(do_flush) - cs_array[0] = pSubmits[i].waitSemaphoreCount ? - queue->device->flush_shader_cs[queue->queue_family_index] : - queue->device->flush_cs[queue->queue_family_index]; + pSubmits[i].commandBufferCount); for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pSubmits[i].pCommandBuffers[j]); assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - cs_array[j + do_flush] = cmd_buffer->cs; + cs_array[j] = cmd_buffer->cs; if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) can_patch = false; } - - for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) { - advance = MIN2(max_cs_submission, - pSubmits[i].commandBufferCount + do_flush - j); - bool b = j == 0; - bool e = j + advance == pSubmits[i].commandBufferCount + do_flush; - - if (queue->device->trace_bo) - *queue->device->trace_id_ptr = 0; - - ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, - advance, initial_preamble_cs, continue_preamble_cs, - (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, - b ? pSubmits[i].waitSemaphoreCount : 0, - (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, - e ? pSubmits[i].signalSemaphoreCount : 0, - can_patch, base_fence); - - if (ret) { - radv_loge("failed to submit CS %d\n", i); - abort(); - } - fence_emitted = true; - if (queue->device->trace_bo) { - bool success = queue->device->ws->ctx_wait_idle( - queue->hw_ctx, - radv_queue_family_to_ring( - queue->queue_family_index), - queue->queue_idx); - - if (!success) { /* Hang */ - radv_dump_trace(queue->device, cs_array[j]); - abort(); - } - } - } + ret = queue->device->ws->cs_submit(ctx, cs_array, + pSubmits[i].commandBufferCount, + can_patch, base_fence); + if (ret) + radv_loge("failed to submit CS %d\n", i); free(cs_array); } if (fence) { - if (!fence_emitted) - ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, - &queue->device->empty_cs[queue->queue_family_index], - 1, NULL, NULL, NULL, 0, NULL, 0, - false, base_fence); + if (!submitCount) + ret = queue->device->ws->cs_submit(ctx, &queue->device->empty_cs, + 1, false, base_fence); fence->submitted = true; } @@ -1942,9 +788,7 @@ VkResult radv_QueueWaitIdle( { RADV_FROM_HANDLE(radv_queue, queue, _queue); - queue->device->ws->ctx_wait_idle(queue->hw_ctx, - radv_queue_family_to_ring(queue->queue_family_index), - queue->queue_idx); + queue->device->ws->ctx_wait_idle(queue->device->hw_ctx); return VK_SUCCESS; } @@ -1953,11 +797,7 @@ VkResult radv_DeviceWaitIdle( { RADV_FROM_HANDLE(radv_device, device, _device); - for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { - for (unsigned q = 0; q < device->queue_count[i]; q++) { - radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q])); - } - } + device->ws->ctx_wait_idle(device->hw_ctx); return VK_SUCCESS; } @@ -1991,21 +831,6 @@ PFN_vkVoidFunction radv_GetDeviceProcAddr( return radv_lookup_entrypoint(pName); } -bool radv_get_memory_fd(struct radv_device *device, - struct radv_device_memory *memory, - int *pFD) -{ - struct radeon_bo_metadata metadata; - - if (memory->image) { - radv_init_metadata(device, memory->image, &metadata); - device->ws->buffer_set_metadata(memory->bo, &metadata); - } - - return device->ws->buffer_get_fd(device->ws, memory->bo, - pFD); -} - VkResult radv_AllocateMemory( VkDevice _device, const VkMemoryAllocateInfo* pAllocateInfo, @@ -2017,7 +842,6 @@ VkResult radv_AllocateMemory( VkResult result; enum radeon_bo_domain domain; uint32_t flags = 0; - const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info = NULL; assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); if (pAllocateInfo->allocationSize == 0) { @@ -2026,45 +850,22 @@ VkResult radv_AllocateMemory( return VK_SUCCESS; } - vk_foreach_struct(ext, pAllocateInfo->pNext) { - switch (ext->sType) { - case VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV: - dedicate_info = (const VkDedicatedAllocationMemoryAllocateInfoNV *)ext; - break; - default: - break; - } - } - mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (mem == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - if (dedicate_info) { - mem->image = radv_image_from_handle(dedicate_info->image); - mem->buffer = radv_buffer_from_handle(dedicate_info->buffer); - } else { - mem->image = NULL; - mem->buffer = NULL; - } - uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); - if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE || - pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED) + if (pAllocateInfo->memoryTypeIndex == 2) domain = RADEON_DOMAIN_GTT; else domain = RADEON_DOMAIN_VRAM; - if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM) + if (pAllocateInfo->memoryTypeIndex == 0) flags |= RADEON_FLAG_NO_CPU_ACCESS; else flags |= RADEON_FLAG_CPU_ACCESS; - - if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE) - flags |= RADEON_FLAG_GTT_WC; - - mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536, + mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768, domain, flags); if (!mem->bo) { @@ -2161,14 +962,19 @@ void radv_GetBufferMemoryRequirements( { RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); - pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1; - - if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) - pMemoryRequirements->alignment = 4096; - else - pMemoryRequirements->alignment = 16; + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<<i` is set if and + * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties + * structure for the physical device is supported. + * + * We support exactly one memory type. + */ + pMemoryRequirements->memoryTypeBits = 0x7; - pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment); + pMemoryRequirements->size = buffer->size; + pMemoryRequirements->alignment = 16; } void radv_GetImageMemoryRequirements( @@ -2178,7 +984,16 @@ void radv_GetImageMemoryRequirements( { RADV_FROM_HANDLE(radv_image, image, _image); - pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1; + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<<i` is set if and + * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties + * structure for the physical device is supported. + * + * We support exactly one memory type. + */ + pMemoryRequirements->memoryTypeBits = 0x7; pMemoryRequirements->size = image->size; pMemoryRequirements->alignment = image->alignment; @@ -2241,89 +1056,13 @@ VkResult radv_BindImageMemory( return VK_SUCCESS; } - -static void -radv_sparse_buffer_bind_memory(struct radv_device *device, - const VkSparseBufferMemoryBindInfo *bind) -{ - RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer); - - for (uint32_t i = 0; i < bind->bindCount; ++i) { - struct radv_device_memory *mem = NULL; - - if (bind->pBinds[i].memory != VK_NULL_HANDLE) - mem = radv_device_memory_from_handle(bind->pBinds[i].memory); - - device->ws->buffer_virtual_bind(buffer->bo, - bind->pBinds[i].resourceOffset, - bind->pBinds[i].size, - mem ? mem->bo : NULL, - bind->pBinds[i].memoryOffset); - } -} - -static void -radv_sparse_image_opaque_bind_memory(struct radv_device *device, - const VkSparseImageOpaqueMemoryBindInfo *bind) -{ - RADV_FROM_HANDLE(radv_image, image, bind->image); - - for (uint32_t i = 0; i < bind->bindCount; ++i) { - struct radv_device_memory *mem = NULL; - - if (bind->pBinds[i].memory != VK_NULL_HANDLE) - mem = radv_device_memory_from_handle(bind->pBinds[i].memory); - - device->ws->buffer_virtual_bind(image->bo, - bind->pBinds[i].resourceOffset, - bind->pBinds[i].size, - mem ? mem->bo : NULL, - bind->pBinds[i].memoryOffset); - } -} - - VkResult radv_QueueBindSparse( - VkQueue _queue, +VkResult radv_QueueBindSparse( + VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo, - VkFence _fence) + VkFence fence) { - RADV_FROM_HANDLE(radv_fence, fence, _fence); - RADV_FROM_HANDLE(radv_queue, queue, _queue); - struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL; - bool fence_emitted = false; - - for (uint32_t i = 0; i < bindInfoCount; ++i) { - for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) { - radv_sparse_buffer_bind_memory(queue->device, - pBindInfo[i].pBufferBinds + j); - } - - for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) { - radv_sparse_image_opaque_bind_memory(queue->device, - pBindInfo[i].pImageOpaqueBinds + j); - } - - if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) { - queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx, - &queue->device->empty_cs[queue->queue_family_index], - 1, NULL, NULL, - (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores, - pBindInfo[i].waitSemaphoreCount, - (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores, - pBindInfo[i].signalSemaphoreCount, - false, base_fence); - fence_emitted = true; - if (fence) - fence->submitted = true; - } - } - - if (fence && !fence_emitted) { - fence->signalled = true; - } - - return VK_SUCCESS; + stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); } VkResult radv_CreateFence( @@ -2344,10 +1083,7 @@ VkResult radv_CreateFence( fence->submitted = false; fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT); fence->fence = device->ws->create_fence(); - if (!fence->fence) { - vk_free2(&device->alloc, pAllocator, fence); - return VK_ERROR_OUT_OF_HOST_MEMORY; - } + *pFence = radv_fence_to_handle(fence); @@ -2447,33 +1183,25 @@ VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence) // Queue semaphore functions VkResult radv_CreateSemaphore( - VkDevice _device, + VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSemaphore* pSemaphore) { - RADV_FROM_HANDLE(radv_device, device, _device); - struct radeon_winsys_sem *sem; + /* The DRM execbuffer ioctl always execute in-oder, even between different + * rings. As such, there's nothing to do for the user space semaphore. + */ - sem = device->ws->create_sem(device->ws); - if (!sem) - return VK_ERROR_OUT_OF_HOST_MEMORY; + *pSemaphore = (VkSemaphore)1; - *pSemaphore = radeon_winsys_sem_to_handle(sem); return VK_SUCCESS; } void radv_DestroySemaphore( - VkDevice _device, - VkSemaphore _semaphore, + VkDevice device, + VkSemaphore semaphore, const VkAllocationCallbacks* pAllocator) { - RADV_FROM_HANDLE(radv_device, device, _device); - RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore); - if (!_semaphore) - return; - - device->ws->destroy_sem(sem); } VkResult radv_CreateEvent( @@ -2570,17 +1298,6 @@ VkResult radv_CreateBuffer( buffer->usage = pCreateInfo->usage; buffer->bo = NULL; buffer->offset = 0; - buffer->flags = pCreateInfo->flags; - - if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) { - buffer->bo = device->ws->buffer_create(device->ws, - align64(buffer->size, 4096), - 4096, 0, RADEON_FLAG_VIRTUAL); - if (!buffer->bo) { - vk_free2(&device->alloc, pAllocator, buffer); - return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - } - } *pBuffer = radv_buffer_to_handle(buffer); @@ -2598,9 +1315,6 @@ void radv_DestroyBuffer( if (!buffer) return; - if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) - device->ws->buffer_destroy(buffer->bo); - vk_free2(&device->alloc, pAllocator, buffer); } @@ -2613,11 +1327,6 @@ si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil) return image->surface.tiling_index[level]; } -static uint32_t radv_surface_layer_count(struct radv_image_view *iview) -{ - return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count; -} - static void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb, @@ -2649,9 +1358,8 @@ radv_initialise_color_surface(struct radv_device *device, va += iview->image->dcc_offset; cb->cb_dcc_base = va >> 8; - uint32_t max_slice = radv_surface_layer_count(iview); cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) | - S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1); + S_028C6C_SLICE_MAX(iview->base_layer + iview->extent.depth - 1); cb->micro_tile_mode = iview->image->surface.micro_tile_mode; pitch_tile_max = level_info->nblk_x / 8 - 1; @@ -2674,14 +1382,14 @@ radv_initialise_color_surface(struct radv_device *device, if (iview->image->fmask.size) { va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset; - if (device->physical_device->rad_info.chip_class >= CIK) + if (device->instance->physicalDevice.rad_info.chip_class >= CIK) cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1); cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index); cb->cb_color_fmask = va >> 8; cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max); } else { /* This must be set for fast clear to work without FMASK. */ - if (device->physical_device->rad_info.chip_class >= CIK) + if (device->instance->physicalDevice.rad_info.chip_class >= CIK) cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); cb->cb_color_fmask = cb->cb_color_base; @@ -2734,14 +1442,13 @@ radv_initialise_color_surface(struct radv_device *device, if (iview->image->fmask.size) cb->cb_color_info |= S_028C70_COMPRESSION(1); - if (iview->image->cmask.size && - !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)) + if (iview->image->cmask.size && device->allow_fast_clears) cb->cb_color_info |= S_028C70_FAST_CLEAR(1); if (iview->image->surface.dcc_size && level_info->dcc_enabled) cb->cb_color_info |= S_028C70_DCC_ENABLE(1); - if (device->physical_device->rad_info.chip_class >= VI) { + if (device->instance->physicalDevice.rad_info.chip_class >= VI) { unsigned max_uncompressed_block_size = 2; if (iview->image->samples > 1) { if (iview->image->surface.bpe == 1) @@ -2756,7 +1463,7 @@ radv_initialise_color_surface(struct radv_device *device, /* This must be set for fast clear to work without FMASK. */ if (!iview->image->fmask.size && - device->physical_device->rad_info.chip_class == SI) { + device->instance->physicalDevice.rad_info.chip_class == SI) { unsigned bankh = util_logbase2(iview->image->surface.bankh); cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); } @@ -2771,7 +1478,6 @@ radv_initialise_ds_surface(struct radv_device *device, unsigned format; uint64_t va, s_offs, z_offs; const struct radeon_surf_level *level_info = &iview->image->surface.level[level]; - bool stencil_only = false; memset(ds, 0, sizeof(*ds)); switch (iview->vk_format) { case VK_FORMAT_D24_UNORM_S8_UINT: @@ -2790,24 +1496,22 @@ radv_initialise_ds_surface(struct radv_device *device, S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); ds->offset_scale = 1.0f; break; - case VK_FORMAT_S8_UINT: - stencil_only = true; - level_info = &iview->image->surface.stencil_level[level]; - break; default: break; } format = radv_translate_dbformat(iview->vk_format); + if (format == V_028040_Z_INVALID) { + fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format); + } va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; s_offs = z_offs = va; z_offs += iview->image->surface.level[level].offset; s_offs += iview->image->surface.stencil_level[level].offset; - uint32_t max_slice = radv_surface_layer_count(iview); ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) | - S_028008_SLICE_MAX(iview->base_layer + max_slice - 1); + S_028008_SLICE_MAX(iview->base_layer + iview->extent.depth - 1); ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1); @@ -2819,8 +1523,8 @@ radv_initialise_ds_surface(struct radv_device *device, else ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); - if (device->physical_device->rad_info.chip_class >= CIK) { - struct radeon_info *info = &device->physical_device->rad_info; + if (device->instance->physicalDevice.rad_info.chip_class >= CIK) { + struct radeon_info *info = &device->instance->physicalDevice.rad_info; unsigned tiling_index = iview->image->surface.tiling_index[level]; unsigned stencil_index = iview->image->surface.stencil_tiling_index[level]; unsigned macro_index = iview->image->surface.macro_tile_index; @@ -2828,9 +1532,6 @@ radv_initialise_ds_surface(struct radv_device *device, unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; - if (stencil_only) - tile_mode = stencil_tile_mode; - ds->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | @@ -2845,11 +1546,9 @@ radv_initialise_ds_surface(struct radv_device *device, ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); tile_mode_index = si_tile_mode_index(iview->image, level, true); ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); - if (stencil_only) - ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); } - if (iview->image->surface.htile_size && !level) { + if (iview->image->htile.size && !level) { ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) | S_028040_ALLOW_EXPCLEAR(1); @@ -2872,7 +1571,7 @@ radv_initialise_ds_surface(struct radv_device *device, ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + - iview->image->htile_offset; + iview->image->htile.offset; ds->db_htile_data_base = va >> 8; ds->db_htile_surface = S_028ABC_FULL_CACHE(1); } else { @@ -2907,9 +1606,6 @@ VkResult radv_CreateFramebuffer( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); framebuffer->attachment_count = pCreateInfo->attachmentCount; - framebuffer->width = pCreateInfo->width; - framebuffer->height = pCreateInfo->height; - framebuffer->layers = pCreateInfo->layers; for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { VkImageView _iview = pCreateInfo->pAttachments[i]; struct radv_image_view *iview = radv_image_view_from_handle(_iview); @@ -2919,11 +1615,12 @@ VkResult radv_CreateFramebuffer( } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview); } - framebuffer->width = MIN2(framebuffer->width, iview->extent.width); - framebuffer->height = MIN2(framebuffer->height, iview->extent.height); - framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview)); } + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + *pFramebuffer = radv_framebuffer_to_handle(framebuffer); return VK_SUCCESS; } @@ -3057,7 +1754,14 @@ radv_init_sampler(struct radv_device *device, uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ? (uint32_t) pCreateInfo->maxAnisotropy : 0; uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso); - bool is_vi = (device->physical_device->rad_info.chip_class >= VI); + bool is_vi; + is_vi = (device->instance->physicalDevice.rad_info.chip_class >= VI); + + if (!is_vi && max_aniso > 0) { + radv_finishme("Anisotropic filtering must be disabled manually " + "by the shader on SI-CI when BASE_LEVEL == LAST_LEVEL\n"); + max_aniso = max_aniso_ratio = 0; + } sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) | S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) | @@ -3076,7 +1780,7 @@ radv_init_sampler(struct radv_device *device, S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) | S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) | S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) | - S_008F38_MIP_POINT_PRECLAMP(0) | + S_008F38_MIP_POINT_PRECLAMP(1) | S_008F38_DISABLE_LSB_CEIL(1) | S_008F38_FILTER_PREC_FIX(1) | S_008F38_ANISO_OVERRIDE(is_vi)); @@ -3118,48 +1822,3 @@ void radv_DestroySampler( return; vk_free2(&device->alloc, pAllocator, sampler); } - - -/* vk_icd.h does not declare this function, so we declare it here to - * suppress Wmissing-prototypes. - */ -PUBLIC VKAPI_ATTR VkResult VKAPI_CALL -vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion); - -PUBLIC VKAPI_ATTR VkResult VKAPI_CALL -vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion) -{ - /* For the full details on loader interface versioning, see - * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>. - * What follows is a condensed summary, to help you navigate the large and - * confusing official doc. - * - * - Loader interface v0 is incompatible with later versions. We don't - * support it. - * - * - In loader interface v1: - * - The first ICD entrypoint called by the loader is - * vk_icdGetInstanceProcAddr(). The ICD must statically expose this - * entrypoint. - * - The ICD must statically expose no other Vulkan symbol unless it is - * linked with -Bsymbolic. - * - Each dispatchable Vulkan handle created by the ICD must be - * a pointer to a struct whose first member is VK_LOADER_DATA. The - * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. - * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and - * vkDestroySurfaceKHR(). The ICD must be capable of working with - * such loader-managed surfaces. - * - * - Loader interface v2 differs from v1 in: - * - The first ICD entrypoint called by the loader is - * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must - * statically expose this entrypoint. - * - * - Loader interface v3 differs from v2 in: - * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), - * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, - * because the loader no longer does so. - */ - *pSupportedVersion = MIN2(*pSupportedVersion, 3u); - return VK_SUCCESS; -} diff --git a/lib/mesa/src/amd/vulkan/radv_entrypoints.c b/lib/mesa/src/amd/vulkan/radv_entrypoints.c index 3f86ea1df..4ee949234 100644 --- a/lib/mesa/src/amd/vulkan/radv_entrypoints.c +++ b/lib/mesa/src/amd/vulkan/radv_entrypoints.c @@ -39,20 +39,20 @@ static const char strings[] = "vkCreateInstance\0" "vkDestroyInstance\0" "vkEnumeratePhysicalDevices\0" - "vkGetDeviceProcAddr\0" - "vkGetInstanceProcAddr\0" - "vkGetPhysicalDeviceProperties\0" - "vkGetPhysicalDeviceQueueFamilyProperties\0" - "vkGetPhysicalDeviceMemoryProperties\0" "vkGetPhysicalDeviceFeatures\0" "vkGetPhysicalDeviceFormatProperties\0" "vkGetPhysicalDeviceImageFormatProperties\0" + "vkGetPhysicalDeviceProperties\0" + "vkGetPhysicalDeviceQueueFamilyProperties\0" + "vkGetPhysicalDeviceMemoryProperties\0" + "vkGetInstanceProcAddr\0" + "vkGetDeviceProcAddr\0" "vkCreateDevice\0" "vkDestroyDevice\0" - "vkEnumerateInstanceLayerProperties\0" "vkEnumerateInstanceExtensionProperties\0" - "vkEnumerateDeviceLayerProperties\0" "vkEnumerateDeviceExtensionProperties\0" + "vkEnumerateInstanceLayerProperties\0" + "vkEnumerateDeviceLayerProperties\0" "vkGetDeviceQueue\0" "vkQueueSubmit\0" "vkQueueWaitIdle\0" @@ -64,10 +64,10 @@ static const char strings[] = "vkFlushMappedMemoryRanges\0" "vkInvalidateMappedMemoryRanges\0" "vkGetDeviceMemoryCommitment\0" - "vkGetBufferMemoryRequirements\0" "vkBindBufferMemory\0" - "vkGetImageMemoryRequirements\0" "vkBindImageMemory\0" + "vkGetBufferMemoryRequirements\0" + "vkGetImageMemoryRequirements\0" "vkGetImageSparseMemoryRequirements\0" "vkGetPhysicalDeviceSparseImageFormatProperties\0" "vkQueueBindSparse\0" @@ -183,47 +183,48 @@ static const char strings[] = "vkGetSwapchainImagesKHR\0" "vkAcquireNextImageKHR\0" "vkQueuePresentKHR\0" - "vkCreateWaylandSurfaceKHR\0" - "vkGetPhysicalDeviceWaylandPresentationSupportKHR\0" + "vkGetPhysicalDeviceDisplayPropertiesKHR\0" + "vkGetPhysicalDeviceDisplayPlanePropertiesKHR\0" + "vkGetDisplayPlaneSupportedDisplaysKHR\0" + "vkGetDisplayModePropertiesKHR\0" + "vkCreateDisplayModeKHR\0" + "vkGetDisplayPlaneCapabilitiesKHR\0" + "vkCreateDisplayPlaneSurfaceKHR\0" + "vkCreateSharedSwapchainsKHR\0" "vkCreateXlibSurfaceKHR\0" "vkGetPhysicalDeviceXlibPresentationSupportKHR\0" "vkCreateXcbSurfaceKHR\0" "vkGetPhysicalDeviceXcbPresentationSupportKHR\0" - "vkCmdDrawIndirectCountAMD\0" - "vkCmdDrawIndexedIndirectCountAMD\0" - "vkGetPhysicalDeviceFeatures2KHR\0" - "vkGetPhysicalDeviceProperties2KHR\0" - "vkGetPhysicalDeviceFormatProperties2KHR\0" - "vkGetPhysicalDeviceImageFormatProperties2KHR\0" - "vkGetPhysicalDeviceQueueFamilyProperties2KHR\0" - "vkGetPhysicalDeviceMemoryProperties2KHR\0" - "vkGetPhysicalDeviceSparseImageFormatProperties2KHR\0" - "vkCmdPushDescriptorSetKHR\0" - "vkTrimCommandPoolKHR\0" - "vkCreateDescriptorUpdateTemplateKHR\0" - "vkDestroyDescriptorUpdateTemplateKHR\0" - "vkUpdateDescriptorSetWithTemplateKHR\0" - "vkCmdPushDescriptorSetWithTemplateKHR\0" + "vkCreateWaylandSurfaceKHR\0" + "vkGetPhysicalDeviceWaylandPresentationSupportKHR\0" + "vkCreateMirSurfaceKHR\0" + "vkGetPhysicalDeviceMirPresentationSupportKHR\0" + "vkCreateAndroidSurfaceKHR\0" + "vkCreateWin32SurfaceKHR\0" + "vkGetPhysicalDeviceWin32PresentationSupportKHR\0" + "vkCreateDebugReportCallbackEXT\0" + "vkDestroyDebugReportCallbackEXT\0" + "vkDebugReportMessageEXT\0" ; static const struct radv_entrypoint entrypoints[] = { { 0, 0x38a581a6 }, { 17, 0x9bd21af2 }, { 35, 0x5787c327 }, - { 62, 0xba013486 }, - { 82, 0x3d2ae9ad }, - { 104, 0x52fe22c9 }, - { 134, 0x4e5fc88a }, - { 175, 0xa90da4da }, - { 211, 0x113e2f33 }, - { 239, 0x3e54b398 }, - { 275, 0xdd36a867 }, + { 62, 0x113e2f33 }, + { 90, 0x3e54b398 }, + { 126, 0xdd36a867 }, + { 167, 0x52fe22c9 }, + { 197, 0x4e5fc88a }, + { 238, 0xa90da4da }, + { 274, 0x3d2ae9ad }, + { 296, 0xba013486 }, { 316, 0x085ed23f }, { 331, 0x1fbcc9cb }, - { 347, 0x081f69d8 }, - { 382, 0xeb27627e }, - { 421, 0x2f8566e7 }, - { 454, 0x5fd13eed }, + { 347, 0xeb27627e }, + { 386, 0x5fd13eed }, + { 423, 0x081f69d8 }, + { 458, 0x2f8566e7 }, { 491, 0xcc920d9a }, { 508, 0xfa4713ec }, { 522, 0x6f8fc2a5 }, @@ -235,10 +236,10 @@ static const struct radv_entrypoint entrypoints[] = { { 611, 0xff52f051 }, { 637, 0x1e115cca }, { 668, 0x46e38db5 }, - { 696, 0xab98422a }, - { 726, 0x06bcbdcb }, - { 745, 0x916f1e63 }, - { 774, 0x5caaae4a }, + { 696, 0x06bcbdcb }, + { 715, 0x5caaae4a }, + { 733, 0xab98422a }, + { 763, 0x916f1e63 }, { 792, 0x15855f5b }, { 827, 0x272ef8ef }, { 874, 0xc3628a09 }, @@ -354,27 +355,28 @@ static const struct radv_entrypoint entrypoints[] = { { 3126, 0x57695f28 }, { 3150, 0xc3fedb2e }, { 3172, 0xfc5fb6ce }, - { 3190, 0x2b2a4b79 }, - { 3216, 0x84e085ac }, - { 3265, 0xa693bc66 }, - { 3288, 0x34a063ab }, - { 3334, 0xc5e5b106 }, - { 3356, 0x41782cb9 }, - { 3401, 0xe5ad0a50 }, - { 3427, 0xc86e9287 }, - { 3460, 0x6a9a3636 }, - { 3492, 0xcd15838c }, - { 3526, 0x9099cbbb }, - { 3566, 0x102ff7ea }, - { 3611, 0x5ceb2bed }, - { 3656, 0xc8c3da3d }, - { 3696, 0x8746ed72 }, - { 3747, 0xf17232a1 }, - { 3773, 0x51177c8d }, - { 3794, 0x5189488a }, - { 3830, 0xaa83901e }, - { 3867, 0x214ad230 }, - { 3904, 0x3d528981 }, + { 3190, 0x0fa0cd2e }, + { 3230, 0xb9b8ddba }, + { 3275, 0xabef4889 }, + { 3313, 0x36b8a8de }, + { 3343, 0xcc0bde41 }, + { 3366, 0x4b60d48c }, + { 3399, 0x7ac4dacb }, + { 3430, 0x47655c4a }, + { 3458, 0xa693bc66 }, + { 3481, 0x34a063ab }, + { 3527, 0xc5e5b106 }, + { 3549, 0x41782cb9 }, + { 3594, 0x2b2a4b79 }, + { 3620, 0x84e085ac }, + { 3669, 0x2ce93a55 }, + { 3691, 0xcf1e6028 }, + { 3736, 0x03667f4e }, + { 3762, 0xfa2ba1e2 }, + { 3786, 0x80e72505 }, + { 3833, 0x0987ef56 }, + { 3864, 0x43d4c4e2 }, + { 3896, 0xa4e75334 }, }; @@ -387,20 +389,20 @@ static const struct radv_entrypoint entrypoints[] = { VkResult radv_CreateInstance(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance) __attribute__ ((weak)); void radv_DestroyInstance(VkInstance instance, const VkAllocationCallbacks* pAllocator) __attribute__ ((weak)); VkResult radv_EnumeratePhysicalDevices(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices) __attribute__ ((weak)); -PFN_vkVoidFunction radv_GetDeviceProcAddr(VkDevice device, const char* pName) __attribute__ ((weak)); -PFN_vkVoidFunction radv_GetInstanceProcAddr(VkInstance instance, const char* pName) __attribute__ ((weak)); -void radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties) __attribute__ ((weak)); -void radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties) __attribute__ ((weak)); -void radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties) __attribute__ ((weak)); void radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures) __attribute__ ((weak)); void radv_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties) __attribute__ ((weak)); VkResult radv_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties) __attribute__ ((weak)); +void radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties) __attribute__ ((weak)); +void radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties) __attribute__ ((weak)); +void radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties) __attribute__ ((weak)); +PFN_vkVoidFunction radv_GetInstanceProcAddr(VkInstance instance, const char* pName) __attribute__ ((weak)); +PFN_vkVoidFunction radv_GetDeviceProcAddr(VkDevice device, const char* pName) __attribute__ ((weak)); VkResult radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) __attribute__ ((weak)); void radv_DestroyDevice(VkDevice device, const VkAllocationCallbacks* pAllocator) __attribute__ ((weak)); -VkResult radv_EnumerateInstanceLayerProperties(uint32_t* pPropertyCount, VkLayerProperties* pProperties) __attribute__ ((weak)); VkResult radv_EnumerateInstanceExtensionProperties(const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties) __attribute__ ((weak)); -VkResult radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkLayerProperties* pProperties) __attribute__ ((weak)); VkResult radv_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties) __attribute__ ((weak)); +VkResult radv_EnumerateInstanceLayerProperties(uint32_t* pPropertyCount, VkLayerProperties* pProperties) __attribute__ ((weak)); +VkResult radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkLayerProperties* pProperties) __attribute__ ((weak)); void radv_GetDeviceQueue(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue) __attribute__ ((weak)); VkResult radv_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) __attribute__ ((weak)); VkResult radv_QueueWaitIdle(VkQueue queue) __attribute__ ((weak)); @@ -412,10 +414,10 @@ void radv_UnmapMemory(VkDevice device, VkDeviceMemory memory) __attribute__ ((we VkResult radv_FlushMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges) __attribute__ ((weak)); VkResult radv_InvalidateMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges) __attribute__ ((weak)); void radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes) __attribute__ ((weak)); -void radv_GetBufferMemoryRequirements(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements) __attribute__ ((weak)); VkResult radv_BindBufferMemory(VkDevice device, VkBuffer buffer, VkDeviceMemory memory, VkDeviceSize memoryOffset) __attribute__ ((weak)); -void radv_GetImageMemoryRequirements(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements) __attribute__ ((weak)); VkResult radv_BindImageMemory(VkDevice device, VkImage image, VkDeviceMemory memory, VkDeviceSize memoryOffset) __attribute__ ((weak)); +void radv_GetBufferMemoryRequirements(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements) __attribute__ ((weak)); +void radv_GetImageMemoryRequirements(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements) __attribute__ ((weak)); void radv_GetImageSparseMemoryRequirements(VkDevice device, VkImage image, uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements* pSparseMemoryRequirements) __attribute__ ((weak)); void radv_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pPropertyCount, VkSparseImageFormatProperties* pProperties) __attribute__ ((weak)); VkResult radv_QueueBindSparse(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo, VkFence fence) __attribute__ ((weak)); @@ -494,14 +496,14 @@ void radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t void radv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) __attribute__ ((weak)); void radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride) __attribute__ ((weak)); void radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride) __attribute__ ((weak)); -void radv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) __attribute__ ((weak)); +void radv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) __attribute__ ((weak)); void radv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset) __attribute__ ((weak)); void radv_CmdCopyBuffer(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferCopy* pRegions) __attribute__ ((weak)); void radv_CmdCopyImage(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageCopy* pRegions) __attribute__ ((weak)); void radv_CmdBlitImage(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkFilter filter) __attribute__ ((weak)); void radv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions) __attribute__ ((weak)); void radv_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions) __attribute__ ((weak)); -void radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const void* pData) __attribute__ ((weak)); +void radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const uint32_t* pData) __attribute__ ((weak)); void radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize size, uint32_t data) __attribute__ ((weak)); void radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) __attribute__ ((weak)); void radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) __attribute__ ((weak)); @@ -531,12 +533,14 @@ void radv_DestroySwapchainKHR(VkDevice device, VkSwapchainKHR swapchain, const V VkResult radv_GetSwapchainImagesKHR(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pSwapchainImageCount, VkImage* pSwapchainImages) __attribute__ ((weak)); VkResult radv_AcquireNextImageKHR(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, VkFence fence, uint32_t* pImageIndex) __attribute__ ((weak)); VkResult radv_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* pPresentInfo) __attribute__ ((weak)); -#ifdef VK_USE_PLATFORM_WAYLAND_KHR -VkResult radv_CreateWaylandSurfaceKHR(VkInstance instance, const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak)); -#endif // VK_USE_PLATFORM_WAYLAND_KHR -#ifdef VK_USE_PLATFORM_WAYLAND_KHR -VkBool32 radv_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, struct wl_display* display) __attribute__ ((weak)); -#endif // VK_USE_PLATFORM_WAYLAND_KHR +VkResult radv_GetPhysicalDeviceDisplayPropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPropertiesKHR* pProperties) __attribute__ ((weak)); +VkResult radv_GetPhysicalDeviceDisplayPlanePropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPlanePropertiesKHR* pProperties) __attribute__ ((weak)); +VkResult radv_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physicalDevice, uint32_t planeIndex, uint32_t* pDisplayCount, VkDisplayKHR* pDisplays) __attribute__ ((weak)); +VkResult radv_GetDisplayModePropertiesKHR(VkPhysicalDevice physicalDevice, VkDisplayKHR display, uint32_t* pPropertyCount, VkDisplayModePropertiesKHR* pProperties) __attribute__ ((weak)); +VkResult radv_CreateDisplayModeKHR(VkPhysicalDevice physicalDevice, VkDisplayKHR display, const VkDisplayModeCreateInfoKHR*pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDisplayModeKHR* pMode) __attribute__ ((weak)); +VkResult radv_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physicalDevice, VkDisplayModeKHR mode, uint32_t planeIndex, VkDisplayPlaneCapabilitiesKHR* pCapabilities) __attribute__ ((weak)); +VkResult radv_CreateDisplayPlaneSurfaceKHR(VkInstance instance, const VkDisplaySurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak)); +VkResult radv_CreateSharedSwapchainsKHR(VkDevice device, uint32_t swapchainCount, const VkSwapchainCreateInfoKHR* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchains) __attribute__ ((weak)); #ifdef VK_USE_PLATFORM_XLIB_KHR VkResult radv_CreateXlibSurfaceKHR(VkInstance instance, const VkXlibSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak)); #endif // VK_USE_PLATFORM_XLIB_KHR @@ -549,40 +553,49 @@ VkResult radv_CreateXcbSurfaceKHR(VkInstance instance, const VkXcbSurfaceCreateI #ifdef VK_USE_PLATFORM_XCB_KHR VkBool32 radv_GetPhysicalDeviceXcbPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, xcb_connection_t* connection, xcb_visualid_t visual_id) __attribute__ ((weak)); #endif // VK_USE_PLATFORM_XCB_KHR -void radv_CmdDrawIndirectCountAMD(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) __attribute__ ((weak)); -void radv_CmdDrawIndexedIndirectCountAMD(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) __attribute__ ((weak)); -void radv_GetPhysicalDeviceFeatures2KHR(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures2KHR* pFeatures) __attribute__ ((weak)); -void radv_GetPhysicalDeviceProperties2KHR(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties2KHR* pProperties) __attribute__ ((weak)); -void radv_GetPhysicalDeviceFormatProperties2KHR(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties2KHR* pFormatProperties) __attribute__ ((weak)); -VkResult radv_GetPhysicalDeviceImageFormatProperties2KHR(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceImageFormatInfo2KHR* pImageFormatInfo, VkImageFormatProperties2KHR* pImageFormatProperties) __attribute__ ((weak)); -void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties2KHR* pQueueFamilyProperties) __attribute__ ((weak)); -void radv_GetPhysicalDeviceMemoryProperties2KHR(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties2KHR* pMemoryProperties) __attribute__ ((weak)); -void radv_GetPhysicalDeviceSparseImageFormatProperties2KHR(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2KHR* pFormatInfo, uint32_t* pPropertyCount, VkSparseImageFormatProperties2KHR* pProperties) __attribute__ ((weak)); -void radv_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t set, uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites) __attribute__ ((weak)); -void radv_TrimCommandPoolKHR(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlagsKHR flags) __attribute__ ((weak)); -VkResult radv_CreateDescriptorUpdateTemplateKHR(VkDevice device, const VkDescriptorUpdateTemplateCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorUpdateTemplateKHR* pDescriptorUpdateTemplate) __attribute__ ((weak)); -void radv_DestroyDescriptorUpdateTemplateKHR(VkDevice device, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const VkAllocationCallbacks* pAllocator) __attribute__ ((weak)); -void radv_UpdateDescriptorSetWithTemplateKHR(VkDevice device, VkDescriptorSet descriptorSet, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const void* pData) __attribute__ ((weak)); -void radv_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, VkPipelineLayout layout, uint32_t set, const void* pData) __attribute__ ((weak)); +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +VkResult radv_CreateWaylandSurfaceKHR(VkInstance instance, const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak)); +#endif // VK_USE_PLATFORM_WAYLAND_KHR +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +VkBool32 radv_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, struct wl_display* display) __attribute__ ((weak)); +#endif // VK_USE_PLATFORM_WAYLAND_KHR +#ifdef VK_USE_PLATFORM_MIR_KHR +VkResult radv_CreateMirSurfaceKHR(VkInstance instance, const VkMirSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak)); +#endif // VK_USE_PLATFORM_MIR_KHR +#ifdef VK_USE_PLATFORM_MIR_KHR +VkBool32 radv_GetPhysicalDeviceMirPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, MirConnection* connection) __attribute__ ((weak)); +#endif // VK_USE_PLATFORM_MIR_KHR +#ifdef VK_USE_PLATFORM_ANDROID_KHR +VkResult radv_CreateAndroidSurfaceKHR(VkInstance instance, const VkAndroidSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak)); +#endif // VK_USE_PLATFORM_ANDROID_KHR +#ifdef VK_USE_PLATFORM_WIN32_KHR +VkResult radv_CreateWin32SurfaceKHR(VkInstance instance, const VkWin32SurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak)); +#endif // VK_USE_PLATFORM_WIN32_KHR +#ifdef VK_USE_PLATFORM_WIN32_KHR +VkBool32 radv_GetPhysicalDeviceWin32PresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex) __attribute__ ((weak)); +#endif // VK_USE_PLATFORM_WIN32_KHR +VkResult radv_CreateDebugReportCallbackEXT(VkInstance instance, const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugReportCallbackEXT* pCallback) __attribute__ ((weak)); +void radv_DestroyDebugReportCallbackEXT(VkInstance instance, VkDebugReportCallbackEXT callback, const VkAllocationCallbacks* pAllocator) __attribute__ ((weak)); +void radv_DebugReportMessageEXT(VkInstance instance, VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, uint64_t object, size_t location, int32_t messageCode, const char* pLayerPrefix, const char* pMessage) __attribute__ ((weak)); const struct radv_dispatch_table radv_layer = { .CreateInstance = radv_CreateInstance, .DestroyInstance = radv_DestroyInstance, .EnumeratePhysicalDevices = radv_EnumeratePhysicalDevices, - .GetDeviceProcAddr = radv_GetDeviceProcAddr, - .GetInstanceProcAddr = radv_GetInstanceProcAddr, - .GetPhysicalDeviceProperties = radv_GetPhysicalDeviceProperties, - .GetPhysicalDeviceQueueFamilyProperties = radv_GetPhysicalDeviceQueueFamilyProperties, - .GetPhysicalDeviceMemoryProperties = radv_GetPhysicalDeviceMemoryProperties, .GetPhysicalDeviceFeatures = radv_GetPhysicalDeviceFeatures, .GetPhysicalDeviceFormatProperties = radv_GetPhysicalDeviceFormatProperties, .GetPhysicalDeviceImageFormatProperties = radv_GetPhysicalDeviceImageFormatProperties, + .GetPhysicalDeviceProperties = radv_GetPhysicalDeviceProperties, + .GetPhysicalDeviceQueueFamilyProperties = radv_GetPhysicalDeviceQueueFamilyProperties, + .GetPhysicalDeviceMemoryProperties = radv_GetPhysicalDeviceMemoryProperties, + .GetInstanceProcAddr = radv_GetInstanceProcAddr, + .GetDeviceProcAddr = radv_GetDeviceProcAddr, .CreateDevice = radv_CreateDevice, .DestroyDevice = radv_DestroyDevice, - .EnumerateInstanceLayerProperties = radv_EnumerateInstanceLayerProperties, .EnumerateInstanceExtensionProperties = radv_EnumerateInstanceExtensionProperties, - .EnumerateDeviceLayerProperties = radv_EnumerateDeviceLayerProperties, .EnumerateDeviceExtensionProperties = radv_EnumerateDeviceExtensionProperties, + .EnumerateInstanceLayerProperties = radv_EnumerateInstanceLayerProperties, + .EnumerateDeviceLayerProperties = radv_EnumerateDeviceLayerProperties, .GetDeviceQueue = radv_GetDeviceQueue, .QueueSubmit = radv_QueueSubmit, .QueueWaitIdle = radv_QueueWaitIdle, @@ -594,10 +607,10 @@ const struct radv_dispatch_table radv_layer = { .FlushMappedMemoryRanges = radv_FlushMappedMemoryRanges, .InvalidateMappedMemoryRanges = radv_InvalidateMappedMemoryRanges, .GetDeviceMemoryCommitment = radv_GetDeviceMemoryCommitment, - .GetBufferMemoryRequirements = radv_GetBufferMemoryRequirements, .BindBufferMemory = radv_BindBufferMemory, - .GetImageMemoryRequirements = radv_GetImageMemoryRequirements, .BindImageMemory = radv_BindImageMemory, + .GetBufferMemoryRequirements = radv_GetBufferMemoryRequirements, + .GetImageMemoryRequirements = radv_GetImageMemoryRequirements, .GetImageSparseMemoryRequirements = radv_GetImageSparseMemoryRequirements, .GetPhysicalDeviceSparseImageFormatProperties = radv_GetPhysicalDeviceSparseImageFormatProperties, .QueueBindSparse = radv_QueueBindSparse, @@ -713,12 +726,14 @@ const struct radv_dispatch_table radv_layer = { .GetSwapchainImagesKHR = radv_GetSwapchainImagesKHR, .AcquireNextImageKHR = radv_AcquireNextImageKHR, .QueuePresentKHR = radv_QueuePresentKHR, -#ifdef VK_USE_PLATFORM_WAYLAND_KHR - .CreateWaylandSurfaceKHR = radv_CreateWaylandSurfaceKHR, -#endif // VK_USE_PLATFORM_WAYLAND_KHR -#ifdef VK_USE_PLATFORM_WAYLAND_KHR - .GetPhysicalDeviceWaylandPresentationSupportKHR = radv_GetPhysicalDeviceWaylandPresentationSupportKHR, -#endif // VK_USE_PLATFORM_WAYLAND_KHR + .GetPhysicalDeviceDisplayPropertiesKHR = radv_GetPhysicalDeviceDisplayPropertiesKHR, + .GetPhysicalDeviceDisplayPlanePropertiesKHR = radv_GetPhysicalDeviceDisplayPlanePropertiesKHR, + .GetDisplayPlaneSupportedDisplaysKHR = radv_GetDisplayPlaneSupportedDisplaysKHR, + .GetDisplayModePropertiesKHR = radv_GetDisplayModePropertiesKHR, + .CreateDisplayModeKHR = radv_CreateDisplayModeKHR, + .GetDisplayPlaneCapabilitiesKHR = radv_GetDisplayPlaneCapabilitiesKHR, + .CreateDisplayPlaneSurfaceKHR = radv_CreateDisplayPlaneSurfaceKHR, + .CreateSharedSwapchainsKHR = radv_CreateSharedSwapchainsKHR, #ifdef VK_USE_PLATFORM_XLIB_KHR .CreateXlibSurfaceKHR = radv_CreateXlibSurfaceKHR, #endif // VK_USE_PLATFORM_XLIB_KHR @@ -731,26 +746,35 @@ const struct radv_dispatch_table radv_layer = { #ifdef VK_USE_PLATFORM_XCB_KHR .GetPhysicalDeviceXcbPresentationSupportKHR = radv_GetPhysicalDeviceXcbPresentationSupportKHR, #endif // VK_USE_PLATFORM_XCB_KHR - .CmdDrawIndirectCountAMD = radv_CmdDrawIndirectCountAMD, - .CmdDrawIndexedIndirectCountAMD = radv_CmdDrawIndexedIndirectCountAMD, - .GetPhysicalDeviceFeatures2KHR = radv_GetPhysicalDeviceFeatures2KHR, - .GetPhysicalDeviceProperties2KHR = radv_GetPhysicalDeviceProperties2KHR, - .GetPhysicalDeviceFormatProperties2KHR = radv_GetPhysicalDeviceFormatProperties2KHR, - .GetPhysicalDeviceImageFormatProperties2KHR = radv_GetPhysicalDeviceImageFormatProperties2KHR, - .GetPhysicalDeviceQueueFamilyProperties2KHR = radv_GetPhysicalDeviceQueueFamilyProperties2KHR, - .GetPhysicalDeviceMemoryProperties2KHR = radv_GetPhysicalDeviceMemoryProperties2KHR, - .GetPhysicalDeviceSparseImageFormatProperties2KHR = radv_GetPhysicalDeviceSparseImageFormatProperties2KHR, - .CmdPushDescriptorSetKHR = radv_CmdPushDescriptorSetKHR, - .TrimCommandPoolKHR = radv_TrimCommandPoolKHR, - .CreateDescriptorUpdateTemplateKHR = radv_CreateDescriptorUpdateTemplateKHR, - .DestroyDescriptorUpdateTemplateKHR = radv_DestroyDescriptorUpdateTemplateKHR, - .UpdateDescriptorSetWithTemplateKHR = radv_UpdateDescriptorSetWithTemplateKHR, - .CmdPushDescriptorSetWithTemplateKHR = radv_CmdPushDescriptorSetWithTemplateKHR, +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + .CreateWaylandSurfaceKHR = radv_CreateWaylandSurfaceKHR, +#endif // VK_USE_PLATFORM_WAYLAND_KHR +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + .GetPhysicalDeviceWaylandPresentationSupportKHR = radv_GetPhysicalDeviceWaylandPresentationSupportKHR, +#endif // VK_USE_PLATFORM_WAYLAND_KHR +#ifdef VK_USE_PLATFORM_MIR_KHR + .CreateMirSurfaceKHR = radv_CreateMirSurfaceKHR, +#endif // VK_USE_PLATFORM_MIR_KHR +#ifdef VK_USE_PLATFORM_MIR_KHR + .GetPhysicalDeviceMirPresentationSupportKHR = radv_GetPhysicalDeviceMirPresentationSupportKHR, +#endif // VK_USE_PLATFORM_MIR_KHR +#ifdef VK_USE_PLATFORM_ANDROID_KHR + .CreateAndroidSurfaceKHR = radv_CreateAndroidSurfaceKHR, +#endif // VK_USE_PLATFORM_ANDROID_KHR +#ifdef VK_USE_PLATFORM_WIN32_KHR + .CreateWin32SurfaceKHR = radv_CreateWin32SurfaceKHR, +#endif // VK_USE_PLATFORM_WIN32_KHR +#ifdef VK_USE_PLATFORM_WIN32_KHR + .GetPhysicalDeviceWin32PresentationSupportKHR = radv_GetPhysicalDeviceWin32PresentationSupportKHR, +#endif // VK_USE_PLATFORM_WIN32_KHR + .CreateDebugReportCallbackEXT = radv_CreateDebugReportCallbackEXT, + .DestroyDebugReportCallbackEXT = radv_DestroyDebugReportCallbackEXT, + .DebugReportMessageEXT = radv_DebugReportMessageEXT, }; -static void * __attribute__ ((noinline)) +void * __attribute__ ((noinline)) radv_resolve_entrypoint(uint32_t index) { return radv_layer.entrypoints[index]; @@ -759,51 +783,51 @@ radv_resolve_entrypoint(uint32_t index) /* Hash table stats: * size 256 entries * collisions entries - * 0 115 - * 1 29 + * 0 111 + * 1 30 * 2 10 - * 3 8 - * 4 4 + * 3 7 + * 4 5 * 5 1 - * 6 0 - * 7 0 + * 6 1 + * 7 1 * 8 1 - * 9+ 0 + * 9+ 2 */ #define none 0xffff static const uint16_t map[] = { - 0x0044, none, none, none, none, 0x002b, 0x0040, 0x0061, - 0x0049, 0x0022, 0x0056, none, none, none, none, none, - none, none, none, 0x0067, none, none, none, none, - 0x0052, 0x0097, 0x0058, 0x004c, none, 0x0069, 0x00a5, none, + 0x0044, none, none, none, 0x0096, 0x002b, 0x0040, 0x0061, + 0x0049, 0x0022, 0x0056, none, none, none, 0x0095, none, + none, none, none, 0x0067, none, none, none, 0x0099, + 0x0052, 0x009d, 0x0058, 0x004c, none, 0x0069, none, none, none, none, 0x0054, none, 0x0014, 0x005b, 0x0070, 0x0002, - 0x007c, none, 0x001c, 0x002f, none, none, 0x0077, 0x0018, - 0x004b, 0x002a, none, 0x0008, 0x0065, 0x0080, 0x006d, 0x0053, - none, 0x009f, 0x004d, 0x0090, 0x0024, 0x00a0, 0x005e, 0x000b, - 0x0088, 0x0091, none, 0x00a6, 0x005c, 0x0033, none, none, - 0x0087, 0x003f, 0x001f, 0x002c, 0x0082, 0x005a, none, none, - 0x0099, 0x0019, 0x0046, 0x003a, none, none, 0x0034, none, - 0x0051, none, none, 0x0020, 0x009b, 0x0066, 0x0075, none, - none, none, 0x0035, 0x001e, 0x006f, 0x0060, 0x0047, 0x000a, - 0x0023, none, none, 0x006b, none, 0x0041, 0x0028, none, - 0x0068, none, 0x00a1, 0x003e, 0x0048, 0x007b, 0x0055, none, - none, 0x0045, 0x006e, 0x0084, none, 0x0089, 0x000e, 0x0030, - none, 0x0027, 0x0081, none, 0x005d, 0x008a, 0x0003, 0x008f, - none, 0x0063, 0x0006, none, 0x0093, 0x00a3, none, none, + 0x007c, none, 0x001e, 0x002f, none, none, 0x0077, 0x0018, + 0x004b, 0x002a, none, 0x0003, 0x0065, 0x0080, 0x006d, 0x0053, + none, none, 0x004d, 0x0090, 0x0024, none, 0x005e, 0x000b, + 0x0088, 0x0091, none, none, 0x005c, 0x0033, none, 0x00a8, + 0x0087, 0x003f, 0x001d, 0x002c, 0x0082, 0x005a, 0x00a2, none, + none, 0x0019, 0x0046, 0x003a, 0x0093, 0x00a1, 0x0034, none, + 0x0051, none, none, 0x0020, none, 0x0066, 0x0075, none, + none, 0x00a3, 0x0035, 0x001f, 0x006f, 0x0060, 0x0047, 0x0005, + 0x0023, 0x00a6, none, 0x006b, none, 0x0041, 0x0028, none, + 0x0068, none, none, 0x003e, 0x0048, 0x007b, 0x0055, 0x00a5, + none, 0x0045, 0x006e, 0x0084, none, 0x0089, 0x000d, 0x0030, + none, 0x0027, 0x0081, 0x009a, 0x005d, 0x008a, 0x000a, 0x008f, + none, 0x0063, 0x0007, none, 0x0098, 0x0097, none, none, none, 0x0059, 0x0026, none, 0x003c, none, 0x0037, none, - 0x0009, 0x0038, 0x0011, none, 0x0072, 0x0016, none, 0x003d, - none, 0x006a, 0x003b, none, 0x004a, 0x0013, 0x0000, 0x007a, - 0x002e, 0x0071, none, 0x0096, 0x0074, 0x0004, 0x004f, 0x0029, - 0x00a4, 0x004e, 0x0095, 0x0031, 0x00a2, 0x001b, none, 0x0073, + 0x0004, 0x0038, 0x0011, none, 0x0072, 0x0016, none, 0x003d, + 0x00a4, 0x006a, 0x003b, none, 0x004a, 0x0013, 0x0000, 0x007a, + 0x002e, 0x0071, none, 0x009c, 0x0074, 0x0009, 0x004f, 0x0029, + none, 0x004e, 0x009b, 0x0031, none, 0x001b, none, 0x0073, 0x005f, 0x0032, 0x0078, 0x008e, none, none, none, 0x006c, - 0x009a, none, 0x0036, none, 0x0050, 0x009c, 0x007d, none, - 0x008c, 0x0005, 0x001a, 0x000c, 0x0098, 0x00a7, 0x0092, none, - none, 0x008d, 0x0094, 0x0015, 0x0083, 0x0043, none, none, - 0x000d, none, 0x0007, none, 0x0025, 0x007f, 0x001d, none, - 0x0076, 0x009d, 0x0064, 0x0085, none, none, none, 0x000f, - 0x007e, none, 0x009e, 0x0017, 0x0012, 0x0010, none, 0x0021, + none, none, 0x0036, none, 0x0050, 0x009f, 0x007d, none, + 0x008c, 0x0006, 0x001a, 0x000c, 0x009e, 0x0094, 0x0092, none, + none, 0x008d, 0x00a0, 0x0015, 0x0083, 0x0043, none, none, + 0x000f, 0x00a7, 0x0008, none, 0x0025, 0x007f, 0x001c, none, + 0x0076, none, 0x0064, 0x0085, none, none, none, 0x0010, + 0x007e, none, none, 0x0017, 0x0012, 0x000e, none, 0x0021, 0x008b, 0x0079, 0x0001, none, none, 0x002d, none, none, none, 0x0086, none, 0x0062, none, 0x0057, 0x0042, 0x0039, }; diff --git a/lib/mesa/src/amd/vulkan/radv_entrypoints.h b/lib/mesa/src/amd/vulkan/radv_entrypoints.h index daeba391b..283aa1f33 100644 --- a/lib/mesa/src/amd/vulkan/radv_entrypoints.h +++ b/lib/mesa/src/amd/vulkan/radv_entrypoints.h @@ -2,25 +2,25 @@ struct radv_dispatch_table { union { - void *entrypoints[168]; + void *entrypoints[169]; struct { PFN_vkCreateInstance CreateInstance; PFN_vkDestroyInstance DestroyInstance; PFN_vkEnumeratePhysicalDevices EnumeratePhysicalDevices; - PFN_vkGetDeviceProcAddr GetDeviceProcAddr; - PFN_vkGetInstanceProcAddr GetInstanceProcAddr; - PFN_vkGetPhysicalDeviceProperties GetPhysicalDeviceProperties; - PFN_vkGetPhysicalDeviceQueueFamilyProperties GetPhysicalDeviceQueueFamilyProperties; - PFN_vkGetPhysicalDeviceMemoryProperties GetPhysicalDeviceMemoryProperties; PFN_vkGetPhysicalDeviceFeatures GetPhysicalDeviceFeatures; PFN_vkGetPhysicalDeviceFormatProperties GetPhysicalDeviceFormatProperties; PFN_vkGetPhysicalDeviceImageFormatProperties GetPhysicalDeviceImageFormatProperties; + PFN_vkGetPhysicalDeviceProperties GetPhysicalDeviceProperties; + PFN_vkGetPhysicalDeviceQueueFamilyProperties GetPhysicalDeviceQueueFamilyProperties; + PFN_vkGetPhysicalDeviceMemoryProperties GetPhysicalDeviceMemoryProperties; + PFN_vkGetInstanceProcAddr GetInstanceProcAddr; + PFN_vkGetDeviceProcAddr GetDeviceProcAddr; PFN_vkCreateDevice CreateDevice; PFN_vkDestroyDevice DestroyDevice; - PFN_vkEnumerateInstanceLayerProperties EnumerateInstanceLayerProperties; PFN_vkEnumerateInstanceExtensionProperties EnumerateInstanceExtensionProperties; - PFN_vkEnumerateDeviceLayerProperties EnumerateDeviceLayerProperties; PFN_vkEnumerateDeviceExtensionProperties EnumerateDeviceExtensionProperties; + PFN_vkEnumerateInstanceLayerProperties EnumerateInstanceLayerProperties; + PFN_vkEnumerateDeviceLayerProperties EnumerateDeviceLayerProperties; PFN_vkGetDeviceQueue GetDeviceQueue; PFN_vkQueueSubmit QueueSubmit; PFN_vkQueueWaitIdle QueueWaitIdle; @@ -32,10 +32,10 @@ struct radv_dispatch_table { PFN_vkFlushMappedMemoryRanges FlushMappedMemoryRanges; PFN_vkInvalidateMappedMemoryRanges InvalidateMappedMemoryRanges; PFN_vkGetDeviceMemoryCommitment GetDeviceMemoryCommitment; - PFN_vkGetBufferMemoryRequirements GetBufferMemoryRequirements; PFN_vkBindBufferMemory BindBufferMemory; - PFN_vkGetImageMemoryRequirements GetImageMemoryRequirements; PFN_vkBindImageMemory BindImageMemory; + PFN_vkGetBufferMemoryRequirements GetBufferMemoryRequirements; + PFN_vkGetImageMemoryRequirements GetImageMemoryRequirements; PFN_vkGetImageSparseMemoryRequirements GetImageSparseMemoryRequirements; PFN_vkGetPhysicalDeviceSparseImageFormatProperties GetPhysicalDeviceSparseImageFormatProperties; PFN_vkQueueBindSparse QueueBindSparse; @@ -151,16 +151,14 @@ struct radv_dispatch_table { PFN_vkGetSwapchainImagesKHR GetSwapchainImagesKHR; PFN_vkAcquireNextImageKHR AcquireNextImageKHR; PFN_vkQueuePresentKHR QueuePresentKHR; -#ifdef VK_USE_PLATFORM_WAYLAND_KHR - PFN_vkCreateWaylandSurfaceKHR CreateWaylandSurfaceKHR; -#else - void *CreateWaylandSurfaceKHR; -#endif -#ifdef VK_USE_PLATFORM_WAYLAND_KHR - PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR GetPhysicalDeviceWaylandPresentationSupportKHR; -#else - void *GetPhysicalDeviceWaylandPresentationSupportKHR; -#endif + PFN_vkGetPhysicalDeviceDisplayPropertiesKHR GetPhysicalDeviceDisplayPropertiesKHR; + PFN_vkGetPhysicalDeviceDisplayPlanePropertiesKHR GetPhysicalDeviceDisplayPlanePropertiesKHR; + PFN_vkGetDisplayPlaneSupportedDisplaysKHR GetDisplayPlaneSupportedDisplaysKHR; + PFN_vkGetDisplayModePropertiesKHR GetDisplayModePropertiesKHR; + PFN_vkCreateDisplayModeKHR CreateDisplayModeKHR; + PFN_vkGetDisplayPlaneCapabilitiesKHR GetDisplayPlaneCapabilitiesKHR; + PFN_vkCreateDisplayPlaneSurfaceKHR CreateDisplayPlaneSurfaceKHR; + PFN_vkCreateSharedSwapchainsKHR CreateSharedSwapchainsKHR; #ifdef VK_USE_PLATFORM_XLIB_KHR PFN_vkCreateXlibSurfaceKHR CreateXlibSurfaceKHR; #else @@ -181,21 +179,44 @@ struct radv_dispatch_table { #else void *GetPhysicalDeviceXcbPresentationSupportKHR; #endif - PFN_vkCmdDrawIndirectCountAMD CmdDrawIndirectCountAMD; - PFN_vkCmdDrawIndexedIndirectCountAMD CmdDrawIndexedIndirectCountAMD; - PFN_vkGetPhysicalDeviceFeatures2KHR GetPhysicalDeviceFeatures2KHR; - PFN_vkGetPhysicalDeviceProperties2KHR GetPhysicalDeviceProperties2KHR; - PFN_vkGetPhysicalDeviceFormatProperties2KHR GetPhysicalDeviceFormatProperties2KHR; - PFN_vkGetPhysicalDeviceImageFormatProperties2KHR GetPhysicalDeviceImageFormatProperties2KHR; - PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR GetPhysicalDeviceQueueFamilyProperties2KHR; - PFN_vkGetPhysicalDeviceMemoryProperties2KHR GetPhysicalDeviceMemoryProperties2KHR; - PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR GetPhysicalDeviceSparseImageFormatProperties2KHR; - PFN_vkCmdPushDescriptorSetKHR CmdPushDescriptorSetKHR; - PFN_vkTrimCommandPoolKHR TrimCommandPoolKHR; - PFN_vkCreateDescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplateKHR; - PFN_vkDestroyDescriptorUpdateTemplateKHR DestroyDescriptorUpdateTemplateKHR; - PFN_vkUpdateDescriptorSetWithTemplateKHR UpdateDescriptorSetWithTemplateKHR; - PFN_vkCmdPushDescriptorSetWithTemplateKHR CmdPushDescriptorSetWithTemplateKHR; +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + PFN_vkCreateWaylandSurfaceKHR CreateWaylandSurfaceKHR; +#else + void *CreateWaylandSurfaceKHR; +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR GetPhysicalDeviceWaylandPresentationSupportKHR; +#else + void *GetPhysicalDeviceWaylandPresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_MIR_KHR + PFN_vkCreateMirSurfaceKHR CreateMirSurfaceKHR; +#else + void *CreateMirSurfaceKHR; +#endif +#ifdef VK_USE_PLATFORM_MIR_KHR + PFN_vkGetPhysicalDeviceMirPresentationSupportKHR GetPhysicalDeviceMirPresentationSupportKHR; +#else + void *GetPhysicalDeviceMirPresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_ANDROID_KHR + PFN_vkCreateAndroidSurfaceKHR CreateAndroidSurfaceKHR; +#else + void *CreateAndroidSurfaceKHR; +#endif +#ifdef VK_USE_PLATFORM_WIN32_KHR + PFN_vkCreateWin32SurfaceKHR CreateWin32SurfaceKHR; +#else + void *CreateWin32SurfaceKHR; +#endif +#ifdef VK_USE_PLATFORM_WIN32_KHR + PFN_vkGetPhysicalDeviceWin32PresentationSupportKHR GetPhysicalDeviceWin32PresentationSupportKHR; +#else + void *GetPhysicalDeviceWin32PresentationSupportKHR; +#endif + PFN_vkCreateDebugReportCallbackEXT CreateDebugReportCallbackEXT; + PFN_vkDestroyDebugReportCallbackEXT DestroyDebugReportCallbackEXT; + PFN_vkDebugReportMessageEXT DebugReportMessageEXT; }; }; @@ -205,20 +226,20 @@ struct radv_dispatch_table { VkResult radv_CreateInstance(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance); void radv_DestroyInstance(VkInstance instance, const VkAllocationCallbacks* pAllocator); VkResult radv_EnumeratePhysicalDevices(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); -PFN_vkVoidFunction radv_GetDeviceProcAddr(VkDevice device, const char* pName); -PFN_vkVoidFunction radv_GetInstanceProcAddr(VkInstance instance, const char* pName); -void radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); -void radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties); -void radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties); void radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); void radv_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties); VkResult radv_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties); +void radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); +void radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties); +void radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties); +PFN_vkVoidFunction radv_GetInstanceProcAddr(VkInstance instance, const char* pName); +PFN_vkVoidFunction radv_GetDeviceProcAddr(VkDevice device, const char* pName); VkResult radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice); void radv_DestroyDevice(VkDevice device, const VkAllocationCallbacks* pAllocator); -VkResult radv_EnumerateInstanceLayerProperties(uint32_t* pPropertyCount, VkLayerProperties* pProperties); VkResult radv_EnumerateInstanceExtensionProperties(const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties); -VkResult radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkLayerProperties* pProperties); VkResult radv_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties); +VkResult radv_EnumerateInstanceLayerProperties(uint32_t* pPropertyCount, VkLayerProperties* pProperties); +VkResult radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkLayerProperties* pProperties); void radv_GetDeviceQueue(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue); VkResult radv_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence); VkResult radv_QueueWaitIdle(VkQueue queue); @@ -230,10 +251,10 @@ void radv_UnmapMemory(VkDevice device, VkDeviceMemory memory); VkResult radv_FlushMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges); VkResult radv_InvalidateMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges); void radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes); -void radv_GetBufferMemoryRequirements(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements); VkResult radv_BindBufferMemory(VkDevice device, VkBuffer buffer, VkDeviceMemory memory, VkDeviceSize memoryOffset); -void radv_GetImageMemoryRequirements(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); VkResult radv_BindImageMemory(VkDevice device, VkImage image, VkDeviceMemory memory, VkDeviceSize memoryOffset); +void radv_GetBufferMemoryRequirements(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements); +void radv_GetImageMemoryRequirements(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); void radv_GetImageSparseMemoryRequirements(VkDevice device, VkImage image, uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); void radv_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pPropertyCount, VkSparseImageFormatProperties* pProperties); VkResult radv_QueueBindSparse(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo, VkFence fence); @@ -312,14 +333,14 @@ void radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t void radv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance); void radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride); void radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride); -void radv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ); +void radv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z); void radv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset); void radv_CmdCopyBuffer(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); void radv_CmdCopyImage(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageCopy* pRegions); void radv_CmdBlitImage(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkFilter filter); void radv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); void radv_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); -void radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const void* pData); +void radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const uint32_t* pData); void radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize size, uint32_t data); void radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); void radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); @@ -349,12 +370,14 @@ void radv_DestroySwapchainKHR(VkDevice device, VkSwapchainKHR swapchain, const V VkResult radv_GetSwapchainImagesKHR(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pSwapchainImageCount, VkImage* pSwapchainImages); VkResult radv_AcquireNextImageKHR(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, VkFence fence, uint32_t* pImageIndex); VkResult radv_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* pPresentInfo); -#ifdef VK_USE_PLATFORM_WAYLAND_KHR -VkResult radv_CreateWaylandSurfaceKHR(VkInstance instance, const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); -#endif // VK_USE_PLATFORM_WAYLAND_KHR -#ifdef VK_USE_PLATFORM_WAYLAND_KHR -VkBool32 radv_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, struct wl_display* display); -#endif // VK_USE_PLATFORM_WAYLAND_KHR +VkResult radv_GetPhysicalDeviceDisplayPropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPropertiesKHR* pProperties); +VkResult radv_GetPhysicalDeviceDisplayPlanePropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPlanePropertiesKHR* pProperties); +VkResult radv_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physicalDevice, uint32_t planeIndex, uint32_t* pDisplayCount, VkDisplayKHR* pDisplays); +VkResult radv_GetDisplayModePropertiesKHR(VkPhysicalDevice physicalDevice, VkDisplayKHR display, uint32_t* pPropertyCount, VkDisplayModePropertiesKHR* pProperties); +VkResult radv_CreateDisplayModeKHR(VkPhysicalDevice physicalDevice, VkDisplayKHR display, const VkDisplayModeCreateInfoKHR*pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDisplayModeKHR* pMode); +VkResult radv_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physicalDevice, VkDisplayModeKHR mode, uint32_t planeIndex, VkDisplayPlaneCapabilitiesKHR* pCapabilities); +VkResult radv_CreateDisplayPlaneSurfaceKHR(VkInstance instance, const VkDisplaySurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +VkResult radv_CreateSharedSwapchainsKHR(VkDevice device, uint32_t swapchainCount, const VkSwapchainCreateInfoKHR* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchains); #ifdef VK_USE_PLATFORM_XLIB_KHR VkResult radv_CreateXlibSurfaceKHR(VkInstance instance, const VkXlibSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); #endif // VK_USE_PLATFORM_XLIB_KHR @@ -367,18 +390,27 @@ VkResult radv_CreateXcbSurfaceKHR(VkInstance instance, const VkXcbSurfaceCreateI #ifdef VK_USE_PLATFORM_XCB_KHR VkBool32 radv_GetPhysicalDeviceXcbPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, xcb_connection_t* connection, xcb_visualid_t visual_id); #endif // VK_USE_PLATFORM_XCB_KHR -void radv_CmdDrawIndirectCountAMD(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride); -void radv_CmdDrawIndexedIndirectCountAMD(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride); -void radv_GetPhysicalDeviceFeatures2KHR(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures2KHR* pFeatures); -void radv_GetPhysicalDeviceProperties2KHR(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties2KHR* pProperties); -void radv_GetPhysicalDeviceFormatProperties2KHR(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties2KHR* pFormatProperties); -VkResult radv_GetPhysicalDeviceImageFormatProperties2KHR(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceImageFormatInfo2KHR* pImageFormatInfo, VkImageFormatProperties2KHR* pImageFormatProperties); -void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties2KHR* pQueueFamilyProperties); -void radv_GetPhysicalDeviceMemoryProperties2KHR(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties2KHR* pMemoryProperties); -void radv_GetPhysicalDeviceSparseImageFormatProperties2KHR(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2KHR* pFormatInfo, uint32_t* pPropertyCount, VkSparseImageFormatProperties2KHR* pProperties); -void radv_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t set, uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites); -void radv_TrimCommandPoolKHR(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlagsKHR flags); -VkResult radv_CreateDescriptorUpdateTemplateKHR(VkDevice device, const VkDescriptorUpdateTemplateCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorUpdateTemplateKHR* pDescriptorUpdateTemplate); -void radv_DestroyDescriptorUpdateTemplateKHR(VkDevice device, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const VkAllocationCallbacks* pAllocator); -void radv_UpdateDescriptorSetWithTemplateKHR(VkDevice device, VkDescriptorSet descriptorSet, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const void* pData); -void radv_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, VkPipelineLayout layout, uint32_t set, const void* pData); +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +VkResult radv_CreateWaylandSurfaceKHR(VkInstance instance, const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +#endif // VK_USE_PLATFORM_WAYLAND_KHR +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +VkBool32 radv_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, struct wl_display* display); +#endif // VK_USE_PLATFORM_WAYLAND_KHR +#ifdef VK_USE_PLATFORM_MIR_KHR +VkResult radv_CreateMirSurfaceKHR(VkInstance instance, const VkMirSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +#endif // VK_USE_PLATFORM_MIR_KHR +#ifdef VK_USE_PLATFORM_MIR_KHR +VkBool32 radv_GetPhysicalDeviceMirPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, MirConnection* connection); +#endif // VK_USE_PLATFORM_MIR_KHR +#ifdef VK_USE_PLATFORM_ANDROID_KHR +VkResult radv_CreateAndroidSurfaceKHR(VkInstance instance, const VkAndroidSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +#endif // VK_USE_PLATFORM_ANDROID_KHR +#ifdef VK_USE_PLATFORM_WIN32_KHR +VkResult radv_CreateWin32SurfaceKHR(VkInstance instance, const VkWin32SurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +#endif // VK_USE_PLATFORM_WIN32_KHR +#ifdef VK_USE_PLATFORM_WIN32_KHR +VkBool32 radv_GetPhysicalDeviceWin32PresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex); +#endif // VK_USE_PLATFORM_WIN32_KHR +VkResult radv_CreateDebugReportCallbackEXT(VkInstance instance, const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugReportCallbackEXT* pCallback); +void radv_DestroyDebugReportCallbackEXT(VkInstance instance, VkDebugReportCallbackEXT callback, const VkAllocationCallbacks* pAllocator); +void radv_DebugReportMessageEXT(VkInstance instance, VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, uint64_t object, size_t location, int32_t messageCode, const char* pLayerPrefix, const char* pMessage); diff --git a/lib/mesa/src/amd/vulkan/radv_entrypoints_gen.py b/lib/mesa/src/amd/vulkan/radv_entrypoints_gen.py index 3474c789e..a6e832a0a 100644 --- a/lib/mesa/src/amd/vulkan/radv_entrypoints_gen.py +++ b/lib/mesa/src/amd/vulkan/radv_entrypoints_gen.py @@ -22,27 +22,14 @@ # IN THE SOFTWARE. # -import sys -import xml.etree.ElementTree as ET - -max_api_version = 1.0 - -supported_extensions = [ - 'VK_AMD_draw_indirect_count', - 'VK_NV_dedicated_allocation', - 'VK_KHR_descriptor_update_template', - 'VK_KHR_get_physical_device_properties2', - 'VK_KHR_incremental_present', - 'VK_KHR_maintenance1', - 'VK_KHR_push_descriptor', - 'VK_KHR_sampler_mirror_clamp_to_edge', - 'VK_KHR_shader_draw_parameters', - 'VK_KHR_surface', - 'VK_KHR_swapchain', - 'VK_KHR_wayland_surface', - 'VK_KHR_xcb_surface', - 'VK_KHR_xlib_surface', -] +import fileinput, re, sys + +# Each function typedef in the vulkan.h header is all on one line and matches +# this regepx. We hope that won't change. + +p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);') + +entrypoints = [] # We generate a static hash table for entry point lookup # (vkGetProcAddress). We use a linear congruential generator for our hash @@ -64,11 +51,29 @@ def hash(name): return h -def print_guard_start(guard): +def get_platform_guard_macro(name): + if "Xlib" in name: + return "VK_USE_PLATFORM_XLIB_KHR" + elif "Xcb" in name: + return "VK_USE_PLATFORM_XCB_KHR" + elif "Wayland" in name: + return "VK_USE_PLATFORM_WAYLAND_KHR" + elif "Mir" in name: + return "VK_USE_PLATFORM_MIR_KHR" + elif "Android" in name: + return "VK_USE_PLATFORM_ANDROID_KHR" + elif "Win32" in name: + return "VK_USE_PLATFORM_WIN32_KHR" + else: + return None + +def print_guard_start(name): + guard = get_platform_guard_macro(name) if guard is not None: print "#ifdef {0}".format(guard) -def print_guard_end(guard): +def print_guard_end(name): + guard = get_platform_guard_macro(name) if guard is not None: print "#endif // {0}".format(guard) @@ -82,61 +87,18 @@ elif (sys.argv[1] == "code"): opt_code = True sys.argv.pop() -# Extract the entry points from the registry -def get_entrypoints(doc, entrypoints_to_defines): - entrypoints = [] - - enabled_commands = set() - for feature in doc.findall('./feature'): - assert feature.attrib['api'] == 'vulkan' - if float(feature.attrib['number']) > max_api_version: - continue - - for command in feature.findall('./require/command'): - enabled_commands.add(command.attrib['name']) +# Parse the entry points in the header - for extension in doc.findall('.extensions/extension'): - if extension.attrib['name'] not in supported_extensions: +i = 0 +for line in fileinput.input(): + m = p.match(line) + if (m): + if m.group(2) == 'VoidFunction': continue - - assert extension.attrib['supported'] == 'vulkan' - for command in extension.findall('./require/command'): - enabled_commands.add(command.attrib['name']) - - index = 0 - for command in doc.findall('./commands/command'): - type = command.find('./proto/type').text - fullname = command.find('./proto/name').text - - if fullname not in enabled_commands: - continue - - shortname = fullname[2:] - params = map(lambda p: "".join(p.itertext()), command.findall('./param')) - params = ', '.join(params) - if fullname in entrypoints_to_defines: - guard = entrypoints_to_defines[fullname] - else: - guard = None - entrypoints.append((type, shortname, params, index, hash(fullname), guard)) - index += 1 - - return entrypoints - -# Maps entry points to extension defines -def get_entrypoints_defines(doc): - entrypoints_to_defines = {} - extensions = doc.findall('./extensions/extension') - for extension in extensions: - define = extension.get('protect') - entrypoints = extension.findall('./require/command') - for entrypoint in entrypoints: - fullname = entrypoint.get('name') - entrypoints_to_defines[fullname] = define - return entrypoints_to_defines - -doc = ET.parse(sys.stdin) -entrypoints = get_entrypoints(doc, get_entrypoints_defines(doc)) + fullname = "vk" + m.group(2) + h = hash(fullname) + entrypoints.append((m.group(1), m.group(2), m.group(3), i, h)) + i = i + 1 # For outputting entrypoints.h we generate a radv_EntryPoint() prototype # per entry point. @@ -149,7 +111,8 @@ if opt_header: print " void *entrypoints[%d];" % len(entrypoints) print " struct {" - for type, name, args, num, h, guard in entrypoints: + for type, name, args, num, h in entrypoints: + guard = get_platform_guard_macro(name) if guard is not None: print "#ifdef {0}".format(guard) print " PFN_vk{0} {0};".format(name) @@ -162,10 +125,10 @@ if opt_header: print " };\n" print "};\n" - for type, name, args, num, h, guard in entrypoints: - print_guard_start(guard) - print "%s radv_%s(%s);" % (type, name, args) - print_guard_end(guard) + for type, name, args, num, h in entrypoints: + print_guard_start(name) + print "%s radv_%s%s;" % (type, name, args) + print_guard_end(name) exit() @@ -211,7 +174,7 @@ static const char strings[] =""" offsets = [] i = 0; -for type, name, args, num, h, guard in entrypoints: +for type, name, args, num, h in entrypoints: print " \"vk%s\\0\"" % name offsets.append(i) i += 2 + len(name) + 1 @@ -220,7 +183,7 @@ print " ;" # Now generate the table of all entry points print "\nstatic const struct radv_entrypoint entrypoints[] = {" -for type, name, args, num, h, guard in entrypoints: +for type, name, args, num, h in entrypoints: print " { %5d, 0x%08x }," % (offsets[num], h) print "};\n" @@ -233,20 +196,20 @@ print """ """ for layer in [ "radv" ]: - for type, name, args, num, h, guard in entrypoints: - print_guard_start(guard) - print "%s %s_%s(%s) __attribute__ ((weak));" % (type, layer, name, args) - print_guard_end(guard) + for type, name, args, num, h in entrypoints: + print_guard_start(name) + print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args) + print_guard_end(name) print "\nconst struct radv_dispatch_table %s_layer = {" % layer - for type, name, args, num, h, guard in entrypoints: - print_guard_start(guard) + for type, name, args, num, h in entrypoints: + print_guard_start(name) print " .%s = %s_%s," % (name, layer, name) - print_guard_end(guard) + print_guard_end(name) print "};\n" print """ -static void * __attribute__ ((noinline)) +void * __attribute__ ((noinline)) radv_resolve_entrypoint(uint32_t index) { return radv_layer.entrypoints[index]; @@ -259,7 +222,7 @@ radv_resolve_entrypoint(uint32_t index) map = [none for f in xrange(hash_size)] collisions = [0 for f in xrange(10)] -for type, name, args, num, h, guard in entrypoints: +for type, name, args, num, h in entrypoints: level = 0 while map[h & hash_mask] != none: h = h + prime_step diff --git a/lib/mesa/src/amd/vulkan/radv_formats.c b/lib/mesa/src/amd/vulkan/radv_formats.c index 61cc67398..fe786b3a4 100644 --- a/lib/mesa/src/amd/vulkan/radv_formats.c +++ b/lib/mesa/src/amd/vulkan/radv_formats.c @@ -30,7 +30,6 @@ #include "util/u_half.h" #include "util/format_srgb.h" -#include "util/format_r11g11b10f.h" uint32_t radv_translate_buffer_dataformat(const struct vk_format_description *desc, int first_non_void) @@ -395,7 +394,7 @@ uint32_t radv_translate_color_numformat(VkFormat format, int first_non_void) { unsigned ntype; - if (first_non_void == -1 || desc->channel[first_non_void].type == VK_FORMAT_TYPE_FLOAT) + if (first_non_void == 4 || desc->channel[first_non_void].type == VK_FORMAT_TYPE_FLOAT) ntype = V_028C70_NUMBER_FLOAT; else { ntype = V_028C70_NUMBER_UNORM; @@ -498,7 +497,7 @@ static bool radv_is_storage_image_format_supported(struct radv_physical_device * } } -static bool radv_is_buffer_format_supported(VkFormat format, bool *scaled) +static bool radv_is_buffer_format_supported(VkFormat format) { const struct vk_format_description *desc = vk_format_description(format); unsigned data_format, num_format; @@ -510,7 +509,6 @@ static bool radv_is_buffer_format_supported(VkFormat format, bool *scaled) num_format = radv_translate_buffer_numformat(desc, vk_format_get_first_non_void_channel(format)); - *scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) || (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED); return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID && num_format != ~0; } @@ -537,7 +535,7 @@ bool radv_is_colorbuffer_format_supported(VkFormat format, bool *blendable) static bool radv_is_zs_format_supported(VkFormat format) { - return radv_translate_dbformat(format) != V_028040_Z_INVALID || format == VK_FORMAT_S8_UINT; + return radv_translate_dbformat(format) != V_028040_Z_INVALID; } static void @@ -548,7 +546,6 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0; const struct vk_format_description *desc = vk_format_description(format); bool blendable; - bool scaled = false; if (!desc) { out_properties->linearTilingFeatures = linear; out_properties->optimalTilingFeatures = tiled; @@ -561,22 +558,18 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; } - if (radv_is_buffer_format_supported(format, &scaled)) { - buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; - if (!scaled) - buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT | - VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; + if (radv_is_buffer_format_supported(format)) { + buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT | + VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT | + VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; } if (vk_format_is_depth_or_stencil(format)) { - if (radv_is_zs_format_supported(format)) { + if (radv_is_zs_format_supported(format)) tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; - tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT | - VK_FORMAT_FEATURE_BLIT_DST_BIT; - tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR | - VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR; - } + tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT; } else { bool linear_sampling; if (radv_is_sampler_format_supported(format, &linear_sampling)) { @@ -597,15 +590,6 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; } } - if (tiled && util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) { - tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR | - VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR; - } - } - - if (linear && util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) { - linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR | - VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR; } if (format == VK_FORMAT_R32_UINT || format == VK_FORMAT_R32_SINT) { @@ -746,6 +730,9 @@ uint32_t radv_translate_dbformat(VkFormat format) case VK_FORMAT_D16_UNORM: case VK_FORMAT_D16_UNORM_S8_UINT: return V_028040_Z_16; + case VK_FORMAT_X8_D24_UNORM_PACK32: + case VK_FORMAT_D24_UNORM_S8_UINT: + return V_028040_Z_24; /* deprecated on SI */ case VK_FORMAT_D32_SFLOAT: case VK_FORMAT_D32_SFLOAT_S8_UINT: return V_028040_Z_32_FLOAT; @@ -864,10 +851,6 @@ bool radv_format_pack_clear_color(VkFormat format, clear_vals[0] = value->uint32[0] & 0xff; clear_vals[1] = 0; break; - case VK_FORMAT_R8_SINT: - clear_vals[0] = value->int32[0] & 0xff; - clear_vals[1] = 0; - break; case VK_FORMAT_R16_UINT: clear_vals[0] = value->uint32[0] & 0xffff; clear_vals[1] = 0; @@ -877,11 +860,6 @@ bool radv_format_pack_clear_color(VkFormat format, clear_vals[0] |= (value->uint32[1] & 0xff) << 8; clear_vals[1] = 0; break; - case VK_FORMAT_R8G8_SINT: - clear_vals[0] = value->int32[0] & 0xff; - clear_vals[0] |= (value->int32[1] & 0xff) << 8; - clear_vals[1] = 0; - break; case VK_FORMAT_R8G8B8A8_UINT: clear_vals[0] = value->uint32[0] & 0xff; clear_vals[0] |= (value->uint32[1] & 0xff) << 8; @@ -889,13 +867,6 @@ bool radv_format_pack_clear_color(VkFormat format, clear_vals[0] |= (value->uint32[3] & 0xff) << 24; clear_vals[1] = 0; break; - case VK_FORMAT_R8G8B8A8_SINT: - clear_vals[0] = value->int32[0] & 0xff; - clear_vals[0] |= (value->int32[1] & 0xff) << 8; - clear_vals[0] |= (value->int32[2] & 0xff) << 16; - clear_vals[0] |= (value->int32[3] & 0xff) << 24; - clear_vals[1] = 0; - break; case VK_FORMAT_A8B8G8R8_UINT_PACK32: clear_vals[0] = value->uint32[0] & 0xff; clear_vals[0] |= (value->uint32[1] & 0xff) << 8; @@ -957,12 +928,8 @@ bool radv_format_pack_clear_color(VkFormat format, clear_vals[1] |= ((uint16_t)util_iround(CLAMP(value->float32[3], 0.0f, 1.0f) * 0xffff)) << 16; break; case VK_FORMAT_A2B10G10R10_UNORM_PACK32: - clear_vals[0] = ((uint16_t)util_iround(CLAMP(value->float32[0], 0.0f, 1.0f) * 0x3ff)) & 0x3ff; - clear_vals[0] |= (((uint16_t)util_iround(CLAMP(value->float32[1], 0.0f, 1.0f) * 0x3ff)) & 0x3ff) << 10; - clear_vals[0] |= (((uint16_t)util_iround(CLAMP(value->float32[2], 0.0f, 1.0f) * 0x3ff)) & 0x3ff) << 20; - clear_vals[0] |= (((uint16_t)util_iround(CLAMP(value->float32[3], 0.0f, 1.0f) * 0x3)) & 0x3) << 30; - clear_vals[1] = 0; - return true; + /* TODO */ + return false; case VK_FORMAT_R32G32_SFLOAT: clear_vals[0] = fui(value->float32[0]); clear_vals[1] = fui(value->float32[1]); @@ -971,10 +938,6 @@ bool radv_format_pack_clear_color(VkFormat format, clear_vals[1] = 0; clear_vals[0] = fui(value->float32[0]); break; - case VK_FORMAT_B10G11R11_UFLOAT_PACK32: - clear_vals[0] = float3_to_r11g11b10f(value->float32); - clear_vals[1] = 0; - break; default: fprintf(stderr, "failed to fast clear %d\n", format); return false; @@ -994,18 +957,6 @@ void radv_GetPhysicalDeviceFormatProperties( pFormatProperties); } -void radv_GetPhysicalDeviceFormatProperties2KHR( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkFormatProperties2KHR* pFormatProperties) -{ - RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice); - - radv_physical_device_get_format_properties(physical_device, - format, - &pFormatProperties->formatProperties); -} - VkResult radv_GetPhysicalDeviceImageFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, @@ -1120,20 +1071,6 @@ unsupported: return VK_ERROR_FORMAT_NOT_SUPPORTED; } -VkResult radv_GetPhysicalDeviceImageFormatProperties2KHR( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceImageFormatInfo2KHR* pImageFormatInfo, - VkImageFormatProperties2KHR *pImageFormatProperties) -{ - return radv_GetPhysicalDeviceImageFormatProperties(physicalDevice, - pImageFormatInfo->format, - pImageFormatInfo->type, - pImageFormatInfo->tiling, - pImageFormatInfo->usage, - pImageFormatInfo->flags, - &pImageFormatProperties->imageFormatProperties); -} - void radv_GetPhysicalDeviceSparseImageFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, @@ -1147,13 +1084,3 @@ void radv_GetPhysicalDeviceSparseImageFormatProperties( /* Sparse images are not yet supported. */ *pNumProperties = 0; } - -void radv_GetPhysicalDeviceSparseImageFormatProperties2KHR( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceSparseImageFormatInfo2KHR* pFormatInfo, - uint32_t *pPropertyCount, - VkSparseImageFormatProperties2KHR* pProperties) -{ - /* Sparse images are not yet supported. */ - *pPropertyCount = 0; -} diff --git a/lib/mesa/src/amd/vulkan/radv_image.c b/lib/mesa/src/amd/vulkan/radv_image.c index 7cf9c6765..9649158ea 100644 --- a/lib/mesa/src/amd/vulkan/radv_image.c +++ b/lib/mesa/src/amd/vulkan/radv_image.c @@ -41,13 +41,6 @@ radv_choose_tiling(struct radv_device *Device, return RADEON_SURF_MODE_LINEAR_ALIGNED; } - /* Textures with a very small height are recommended to be linear. */ - if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D || - /* Only very thin and long 2D textures should benefit from - * linear_aligned. */ - (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2)) - return RADEON_SURF_MODE_LINEAR_ALIGNED; - /* MSAA resources must be 2D tiled. */ if (pCreateInfo->samples > 1) return RADEON_SURF_MODE_2D; @@ -119,8 +112,8 @@ radv_init_surface(struct radv_device *device, VK_IMAGE_USAGE_STORAGE_BIT)) || (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) || (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) || - device->physical_device->rad_info.chip_class < VI || - create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC) || + device->instance->physicalDevice.rad_info.chip_class < VI || + create_info->scanout || !device->allow_dcc || !radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable)) surface->flags |= RADEON_SURF_DISABLE_DCC; if (create_info->scanout) @@ -130,7 +123,7 @@ radv_init_surface(struct radv_device *device, #define ATI_VENDOR_ID 0x1002 static uint32_t si_get_bo_metadata_word1(struct radv_device *device) { - return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id; + return (ATI_VENDOR_ID << 16) | device->instance->physicalDevice.rad_info.pci_id; } static inline unsigned @@ -185,11 +178,6 @@ radv_make_buffer_descriptor(struct radv_device *device, state[0] = va; state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); - - if (device->physical_device->rad_info.chip_class < VI && stride) { - range /= stride; - } - state[2] = range; state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) | S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) | @@ -213,7 +201,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, state[1] &= C_008F14_BASE_ADDRESS_HI; state[3] &= C_008F1C_TILING_INDEX; - state[4] &= C_008F20_PITCH_GFX6; + state[4] &= C_008F20_PITCH; state[6] &= C_008F28_COMPRESSION_EN; assert(!(va & 255)); @@ -222,7 +210,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level, is_stencil)); - state[4] |= S_008F20_PITCH_GFX6(pitch - 1); + state[4] |= S_008F20_PITCH(pitch - 1); if (image->surface.dcc_size && image->surface.level[first_level].dcc_enabled) { state[6] |= S_008F28_COMPRESSION_EN(1); @@ -309,8 +297,8 @@ si_make_texture_descriptor(struct radv_device *device, depth = image->array_size / 6; state[0] = 0; - state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) | - S_008F14_NUM_FORMAT_GFX6(num_format)); + state[1] = (S_008F14_DATA_FORMAT(data_format) | + S_008F14_NUM_FORMAT(num_format)); state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1)); state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | @@ -338,7 +326,7 @@ si_make_texture_descriptor(struct radv_device *device, /* The last dword is unused by hw. The shader uses it to clear * bits in the first dword of sampler state. */ - if (device->physical_device->rad_info.chip_class <= CIK && image->samples <= 1) { + if (device->instance->physicalDevice.rad_info.chip_class <= CIK && image->samples <= 1) { if (first_level == last_level) state[7] = C_008F30_MAX_ANISO_RATIO; else @@ -371,8 +359,8 @@ si_make_texture_descriptor(struct radv_device *device, fmask_state[0] = va >> 8; fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | - S_008F14_DATA_FORMAT_GFX6(fmask_format) | - S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_UINT); + S_008F14_DATA_FORMAT(fmask_format) | + S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1); fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | @@ -382,13 +370,12 @@ si_make_texture_descriptor(struct radv_device *device, S_008F1C_TILING_INDEX(image->fmask.tile_mode_index) | S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false)); fmask_state[4] = S_008F20_DEPTH(depth - 1) | - S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1); + S_008F20_PITCH(image->fmask.pitch_in_pixels - 1); fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) | S_008F24_LAST_ARRAY(last_layer); fmask_state[6] = 0; fmask_state[7] = 0; - } else if (fmask_state) - memset(fmask_state, 0, 8 * 4); + } } static void @@ -523,7 +510,6 @@ radv_image_alloc_fmask(struct radv_device *device, image->fmask.offset = align64(image->size, image->fmask.alignment); image->size = image->fmask.offset + image->fmask.size; - image->alignment = MAX2(image->alignment, image->fmask.alignment); } static void @@ -531,8 +517,8 @@ radv_image_get_cmask_info(struct radv_device *device, struct radv_image *image, struct radv_cmask_info *out) { - unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes; - unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes; + unsigned pipe_interleave_bytes = device->instance->physicalDevice.rad_info.pipe_interleave_bytes; + unsigned num_pipes = device->instance->physicalDevice.rad_info.num_tile_pipes; unsigned cl_width, cl_height; switch (num_pipes) { @@ -566,6 +552,10 @@ radv_image_get_cmask_info(struct radv_device *device, /* Each element of CMASK is a nibble. */ unsigned slice_bytes = slice_elements / 2; + out->pitch = width; + out->height = height; + out->xalign = cl_width * 8; + out->yalign = cl_height * 8; out->slice_tile_max = (width * height) / (128*128); if (out->slice_tile_max) out->slice_tile_max -= 1; @@ -585,7 +575,6 @@ radv_image_alloc_cmask(struct radv_device *device, /* + 8 for storing the clear values */ image->clear_value_offset = image->cmask.offset + image->cmask.size; image->size = image->cmask.offset + image->cmask.size + 8; - image->alignment = MAX2(image->alignment, image->cmask.alignment); } static void @@ -596,24 +585,88 @@ radv_image_alloc_dcc(struct radv_device *device, /* + 8 for storing the clear values */ image->clear_value_offset = image->dcc_offset + image->surface.dcc_size; image->size = image->dcc_offset + image->surface.dcc_size + 8; - image->alignment = MAX2(image->alignment, image->surface.dcc_alignment); +} + +static unsigned +radv_image_get_htile_size(struct radv_device *device, + struct radv_image *image) +{ + unsigned cl_width, cl_height, width, height; + unsigned slice_elements, slice_bytes, base_align; + unsigned num_pipes = device->instance->physicalDevice.rad_info.num_tile_pipes; + unsigned pipe_interleave_bytes = device->instance->physicalDevice.rad_info.pipe_interleave_bytes; + + /* Overalign HTILE on P2 configs to work around GPU hangs in + * piglit/depthstencil-render-miplevels 585. + * + * This has been confirmed to help Kabini & Stoney, where the hangs + * are always reproducible. I think I have seen the test hang + * on Carrizo too, though it was very rare there. + */ + if (device->instance->physicalDevice.rad_info.chip_class >= CIK && num_pipes < 4) + num_pipes = 4; + + switch (num_pipes) { + case 1: + cl_width = 32; + cl_height = 16; + break; + case 2: + cl_width = 32; + cl_height = 32; + break; + case 4: + cl_width = 64; + cl_height = 32; + break; + case 8: + cl_width = 64; + cl_height = 64; + break; + case 16: + cl_width = 128; + cl_height = 64; + break; + default: + assert(0); + return 0; + } + + width = align(image->surface.npix_x, cl_width * 8); + height = align(image->surface.npix_y, cl_height * 8); + + slice_elements = (width * height) / (8 * 8); + slice_bytes = slice_elements * 4; + + base_align = num_pipes * pipe_interleave_bytes; + + image->htile.pitch = width; + image->htile.height = height; + image->htile.xalign = cl_width * 8; + image->htile.yalign = cl_height * 8; + + return image->array_size * + align(slice_bytes, base_align); } static void radv_image_alloc_htile(struct radv_device *device, struct radv_image *image) { - if ((device->debug_flags & RADV_DEBUG_NO_HIZ) || image->levels > 1) { - image->surface.htile_size = 0; + if (env_var_as_boolean("RADV_HIZ_DISABLE", false)) + return; + + image->htile.size = radv_image_get_htile_size(device, image); + + if (!image->htile.size) return; - } - image->htile_offset = align64(image->size, image->surface.htile_alignment); + image->htile.offset = align64(image->size, 32768); /* + 8 for storing the clear values */ - image->clear_value_offset = image->htile_offset + image->surface.htile_size; - image->size = image->clear_value_offset + 8; - image->alignment = align64(image->alignment, image->surface.htile_alignment); + image->clear_value_offset = image->htile.offset + image->htile.size; + image->size = image->htile.offset + image->htile.size + 8; + image->alignment = align64(image->alignment, 32768); } VkResult @@ -625,7 +678,7 @@ radv_image_create(VkDevice _device, RADV_FROM_HANDLE(radv_device, device, _device); const VkImageCreateInfo *pCreateInfo = create_info->vk_info; struct radv_image *image = NULL; - bool can_cmask_dcc = false; + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); radv_assert(pCreateInfo->mipLevels > 0); @@ -649,14 +702,6 @@ radv_image_create(VkDevice _device, image->samples = pCreateInfo->samples; image->tiling = pCreateInfo->tiling; image->usage = pCreateInfo->usage; - image->flags = pCreateInfo->flags; - - image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE; - if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) { - for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i) - image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i]; - } - radv_init_surface(device, &image->surface, create_info); device->ws->surface_init(device->ws, &image->surface); @@ -664,18 +709,15 @@ radv_image_create(VkDevice _device, image->size = image->surface.bo_size; image->alignment = image->surface.bo_alignment; - if (image->exclusive || image->queue_family_mask == 1) - can_cmask_dcc = true; - if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && - image->surface.dcc_size && can_cmask_dcc) + image->surface.dcc_size) radv_image_alloc_dcc(device, image); else image->surface.dcc_size = 0; if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && pCreateInfo->mipLevels == 1 && - !image->surface.dcc_size && image->extent.depth == 1 && can_cmask_dcc) + !image->surface.dcc_size && image->extent.depth == 1) radv_image_alloc_cmask(device, image); if (image->samples > 1 && vk_format_is_color(pCreateInfo->format)) { radv_image_alloc_fmask(device, image); @@ -690,20 +732,6 @@ radv_image_create(VkDevice _device, image->surface.level[0].pitch_bytes = create_info->stride; image->surface.level[0].slice_size = create_info->stride * image->surface.level[0].nblk_y; } - - if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) { - image->alignment = MAX2(image->alignment, 4096); - image->size = align64(image->size, image->alignment); - image->offset = 0; - - image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment, - 0, RADEON_FLAG_VIRTUAL); - if (!image->bo) { - vk_free2(&device->alloc, alloc, image); - return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - } - } - *pImage = radv_image_to_handle(image); return VK_SUCCESS; @@ -718,7 +746,6 @@ radv_image_view_init(struct radv_image_view *iview, { RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - uint32_t blk_w; bool is_stencil = false; switch (image->type) { case VK_IMAGE_TYPE_1D: @@ -756,8 +783,6 @@ radv_image_view_init(struct radv_image_view *iview, iview->extent.height = round_up_u32(iview->extent.height * vk_format_get_blockheight(iview->vk_format), vk_format_get_blockheight(image->vk_format)); - assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0); - blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format); iview->base_layer = range->baseArrayLayer; iview->layer_count = radv_get_layerCount(image, range); iview->base_mip = range->baseMipLevel; @@ -777,7 +802,7 @@ radv_image_view_init(struct radv_image_view *iview, si_set_mutable_tex_desc_fields(device, image, is_stencil ? &image->surface.stencil_level[range->baseMipLevel] : &image->surface.level[range->baseMipLevel], range->baseMipLevel, range->baseMipLevel, - blk_w, is_stencil, iview->descriptor); + image->surface.blk_w, is_stencil, iview->descriptor); } void radv_image_set_optimal_micro_tile_mode(struct radv_device *device, @@ -787,7 +812,7 @@ void radv_image_set_optimal_micro_tile_mode(struct radv_device *device, * definitions for them either. They are all 2D_TILED_THIN1 modes with * different bpp and micro tile mode. */ - if (device->physical_device->rad_info.chip_class >= CIK) { + if (device->instance->physicalDevice.rad_info.chip_class >= CIK) { switch (micro_tile_mode) { case 0: /* displayable */ image->surface.tiling_index[0] = 10; @@ -862,22 +887,11 @@ bool radv_layout_can_expclear(const struct radv_image *image, layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); } -bool radv_layout_can_fast_clear(const struct radv_image *image, - VkImageLayout layout, - unsigned queue_mask) +bool radv_layout_has_cmask(const struct radv_image *image, + VkImageLayout layout) { - return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && - queue_mask == (1u << RADV_QUEUE_GENERAL); -} - - -unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family) -{ - if (!image->exclusive) - return image->queue_family_mask; - if (family == VK_QUEUE_FAMILY_IGNORED) - return 1u << queue_family; - return 1u << family; + return (layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL || + layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); } VkResult @@ -900,15 +914,11 @@ radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); - RADV_FROM_HANDLE(radv_image, image, _image); - if (!image) + if (!_image) return; - if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) - device->ws->buffer_destroy(image->bo); - - vk_free2(&device->alloc, pAllocator, image); + vk_free2(&device->alloc, pAllocator, radv_image_from_handle(_image)); } void radv_GetImageSubresourceLayout( diff --git a/lib/mesa/src/amd/vulkan/radv_meta.c b/lib/mesa/src/amd/vulkan/radv_meta.c index fac0dcf6d..04fa247dd 100644 --- a/lib/mesa/src/amd/vulkan/radv_meta.c +++ b/lib/mesa/src/amd/vulkan/radv_meta.c @@ -51,10 +51,8 @@ void radv_meta_restore(const struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer) { - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - radv_pipeline_to_handle(state->old_pipeline)); - - cmd_buffer->state.descriptors[0] = state->old_descriptor_set0; + cmd_buffer->state.pipeline = state->old_pipeline; + radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0); memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, sizeof(state->old_vertex_bindings)); @@ -112,8 +110,7 @@ radv_meta_restore_compute(const struct radv_meta_saved_compute_state *state, { radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, radv_pipeline_to_handle(state->old_pipeline)); - - cmd_buffer->state.descriptors[0] = state->old_descriptor_set0; + radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0); if (push_constant_size) { memcpy(cmd_buffer->push_constants, state->push_constants, push_constant_size); @@ -327,10 +324,6 @@ radv_device_init_meta(struct radv_device *device) if (result != VK_SUCCESS) goto fail_buffer; - result = radv_device_init_meta_query_state(device); - if (result != VK_SUCCESS) - goto fail_query; - result = radv_device_init_meta_fast_clear_flush_state(device); if (result != VK_SUCCESS) goto fail_fast_clear; @@ -344,8 +337,6 @@ fail_resolve_compute: radv_device_finish_meta_fast_clear_flush_state(device); fail_fast_clear: radv_device_finish_meta_buffer_state(device); -fail_query: - radv_device_finish_meta_query_state(device); fail_buffer: radv_device_finish_meta_depth_decomp_state(device); fail_depth_decomp: @@ -372,7 +363,6 @@ radv_device_finish_meta(struct radv_device *device) radv_device_finish_meta_blit2d_state(device); radv_device_finish_meta_bufimage_state(device); radv_device_finish_meta_depth_decomp_state(device); - radv_device_finish_meta_query_state(device); radv_device_finish_meta_buffer_state(device); radv_device_finish_meta_fast_clear_flush_state(device); radv_device_finish_meta_resolve_compute_state(device); diff --git a/lib/mesa/src/amd/vulkan/radv_meta.h b/lib/mesa/src/amd/vulkan/radv_meta.h index 6cfc6134c..97d020cea 100644 --- a/lib/mesa/src/amd/vulkan/radv_meta.h +++ b/lib/mesa/src/amd/vulkan/radv_meta.h @@ -85,9 +85,6 @@ void radv_device_finish_meta_blit2d_state(struct radv_device *device); VkResult radv_device_init_meta_buffer_state(struct radv_device *device); void radv_device_finish_meta_buffer_state(struct radv_device *device); -VkResult radv_device_init_meta_query_state(struct radv_device *device); -void radv_device_finish_meta_query_state(struct radv_device *device); - VkResult radv_device_init_meta_resolve_compute_state(struct radv_device *device); void radv_device_finish_meta_resolve_compute_state(struct radv_device *device); void radv_meta_save(struct radv_meta_saved_state *state, @@ -162,34 +159,13 @@ void radv_meta_begin_bufimage(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_compute_state *save); void radv_meta_end_bufimage(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_compute_state *save); -void radv_meta_begin_itoi(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_saved_compute_state *save); -void radv_meta_end_itoi(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_saved_compute_state *save); -void radv_meta_begin_cleari(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_saved_compute_state *save); -void radv_meta_end_cleari(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_saved_compute_state *save); + void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src, struct radv_meta_blit2d_buffer *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects); -void radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_buffer *src, - struct radv_meta_blit2d_surf *dst, - unsigned num_rects, - struct radv_meta_blit2d_rect *rects); -void radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_surf *src, - struct radv_meta_blit2d_surf *dst, - unsigned num_rects, - struct radv_meta_blit2d_rect *rects); -void radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_surf *dst, - const VkClearColorValue *clear_color); - void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageSubresourceRange *subresourceRange); @@ -197,8 +173,7 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageSubresourceRange *subresourceRange); void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - const VkImageSubresourceRange *subresourceRange); + struct radv_image *image); void radv_meta_save_graphics_reset_vport_scissor(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer); @@ -211,9 +186,6 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, uint32_t region_count, const VkImageResolve *regions); -void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - struct radv_image *linear_image); #ifdef __cplusplus } #endif diff --git a/lib/mesa/src/amd/vulkan/radv_meta_blit.c b/lib/mesa/src/amd/vulkan/radv_meta_blit.c index a3256ab05..dfba8a897 100644 --- a/lib/mesa/src/amd/vulkan/radv_meta_blit.c +++ b/lib/mesa/src/amd/vulkan/radv_meta_blit.c @@ -38,7 +38,7 @@ build_nir_vertex_shader(void) nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "meta_blit_vs"); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "a_pos"); @@ -70,7 +70,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); sprintf(shader_name, "meta_blit_fs.%d", tex_dim); - b.shader->info->name = ralloc_strdup(b.shader, shader_name); + b.shader->info.name = ralloc_strdup(b.shader, shader_name); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos"); @@ -124,7 +124,7 @@ build_nir_copy_fragment_shader_depth(enum glsl_sampler_dim tex_dim) nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); sprintf(shader_name, "meta_blit_depth_fs.%d", tex_dim); - b.shader->info->name = ralloc_strdup(b.shader, shader_name); + b.shader->info.name = ralloc_strdup(b.shader, shader_name); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos"); @@ -178,7 +178,7 @@ build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim) nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); sprintf(shader_name, "meta_blit_stencil_fs.%d", tex_dim); - b.shader->info->name = ralloc_strdup(b.shader, shader_name); + b.shader->info.name = ralloc_strdup(b.shader, shader_name); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos"); @@ -226,13 +226,12 @@ static void meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, struct radv_image_view *src_iview, - VkOffset3D src_offset_0, - VkOffset3D src_offset_1, + VkOffset3D src_offset, + VkExtent3D src_extent, struct radv_image *dest_image, struct radv_image_view *dest_iview, - VkOffset3D dest_offset_0, - VkOffset3D dest_offset_1, - VkRect2D dest_box, + VkOffset3D dest_offset, + VkExtent3D dest_extent, VkFilter blit_filter) { struct radv_device *device = cmd_buffer->device; @@ -246,37 +245,38 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, unsigned vb_size = 3 * sizeof(*vb_data); vb_data[0] = (struct blit_vb_data) { .pos = { - -1.0, - -1.0, + dest_offset.x, + dest_offset.y, }, .tex_coord = { - (float)src_offset_0.x / (float)src_iview->extent.width, - (float)src_offset_0.y / (float)src_iview->extent.height, - (float)src_offset_0.z / (float)src_iview->extent.depth, + (float)(src_offset.x) / (float)src_iview->extent.width, + (float)(src_offset.y) / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, }, }; vb_data[1] = (struct blit_vb_data) { .pos = { - -1.0, - 1.0, + dest_offset.x, + dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)src_offset_0.x / (float)src_iview->extent.width, - (float)src_offset_1.y / (float)src_iview->extent.height, - (float)src_offset_0.z / (float)src_iview->extent.depth, + (float)src_offset.x / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / + (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, }, }; vb_data[2] = (struct blit_vb_data) { .pos = { - 1.0, - -1.0, + dest_offset.x + dest_extent.width, + dest_offset.y, }, .tex_coord = { - (float)src_offset_1.x / (float)src_iview->extent.width, - (float)src_offset_0.y / (float)src_iview->extent.height, - (float)src_offset_0.z / (float)src_iview->extent.depth, + (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, + (float)src_offset.y / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, }, }; radv_cmd_buffer_upload_data(cmd_buffer, vb_size, 16, vb_data, &offset); @@ -307,6 +307,31 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, }, &cmd_buffer->pool->alloc, &sampler); + VkDescriptorSet set; + radv_temp_descriptor_set_create(cmd_buffer->device, cmd_buffer, + device->meta_state.blit.ds_layout, + &set); + + radv_UpdateDescriptorSets(radv_device_to_handle(device), + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler, + .imageView = radv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + VkFramebuffer fb; radv_CreateFramebuffer(radv_device_to_handle(device), &(VkFramebufferCreateInfo) { @@ -330,8 +355,8 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, .renderPass = device->meta_state.blit.render_pass[fs_key], .framebuffer = fb, .renderArea = { - .offset = { dest_box.offset.x, dest_box.offset.y }, - .extent = { dest_box.extent.width, dest_box.extent.height }, + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, }, .clearValueCount = 0, .pClearValues = NULL, @@ -358,8 +383,8 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, .renderPass = device->meta_state.blit.depth_only_rp, .framebuffer = fb, .renderArea = { - .offset = { dest_box.offset.x, dest_box.offset.y }, - .extent = { dest_box.extent.width, dest_box.extent.height }, + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, }, .clearValueCount = 0, .pClearValues = NULL, @@ -385,9 +410,9 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, .renderPass = device->meta_state.blit.stencil_only_rp, .framebuffer = fb, .renderArea = { - .offset = { dest_box.offset.x, dest_box.offset.y }, - .extent = { dest_box.extent.width, dest_box.extent.height }, - }, + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, + }, .clearValueCount = 0, .pClearValues = NULL, }, VK_SUBPASS_CONTENTS_INLINE); @@ -414,43 +439,10 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } - radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit.pipeline_layout, - 0, /* set */ - 1, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = sampler, - .imageView = radv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - } - }); - - radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { - .x = dest_offset_0.x, - .y = dest_offset_0.y, - .width = dest_offset_1.x - dest_offset_0.x, - .height = dest_offset_1.y - dest_offset_0.y, - .minDepth = 0.0f, - .maxDepth = 1.0f - }); - - radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) { - .offset = (VkOffset2D) { MIN2(dest_offset_0.x, dest_offset_1.x), MIN2(dest_offset_0.y, dest_offset_1.y) }, - .extent = (VkExtent2D) { - abs(dest_offset_1.x - dest_offset_0.x), - abs(dest_offset_1.y - dest_offset_0.y) - }, - }); + radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline_layout, 0, 1, + &set, 0, NULL); radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); @@ -462,32 +454,13 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, /* TODO: above comment is not valid for at least descriptor sets/pools, * as we may not free them till after execution finishes. Check others. */ + radv_temp_descriptor_set_destroy(cmd_buffer->device, set); radv_DestroySampler(radv_device_to_handle(device), sampler, &cmd_buffer->pool->alloc); radv_DestroyFramebuffer(radv_device_to_handle(device), fb, &cmd_buffer->pool->alloc); } -static bool -flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1) -{ - bool flip = false; - if (*src0 > *src1) { - unsigned tmp = *src0; - *src0 = *src1; - *src1 = tmp; - flip = !flip; - } - - if (*dst0 > *dst1) { - unsigned tmp = *dst0; - *dst0 = *dst1; - *dst1 = tmp; - flip = !flip; - } - return flip; -} - void radv_CmdBlitImage( VkCommandBuffer commandBuffer, VkImage srcImage, @@ -515,8 +488,6 @@ void radv_CmdBlitImage( radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); for (unsigned r = 0; r < regionCount; r++) { - const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource; - const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource; struct radv_image_view src_iview; radv_image_view_init(&src_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { @@ -525,92 +496,59 @@ void radv_CmdBlitImage( .viewType = radv_meta_get_view_type(src_image), .format = src_image->vk_format, .subresourceRange = { - .aspectMask = src_res->aspectMask, - .baseMipLevel = src_res->mipLevel, + .aspectMask = pRegions[r].srcSubresource.aspectMask, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, .levelCount = 1, - .baseArrayLayer = src_res->baseArrayLayer, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, .layerCount = 1 }, }, cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT); - unsigned dst_start, dst_end; - if (dest_image->type == VK_IMAGE_TYPE_3D) { - assert(dst_res->baseArrayLayer == 0); - dst_start = pRegions[r].dstOffsets[0].z; - dst_end = pRegions[r].dstOffsets[1].z; - } else { - dst_start = dst_res->baseArrayLayer; - dst_end = dst_start + dst_res->layerCount; - } + if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x || + pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y || + pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x || + pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y) + radv_finishme("FINISHME: Allow flipping in blits"); - unsigned src_start, src_end; - if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(src_res->baseArrayLayer == 0); - src_start = pRegions[r].srcOffsets[0].z; - src_end = pRegions[r].srcOffsets[1].z; - } else { - src_start = src_res->baseArrayLayer; - src_end = src_start + src_res->layerCount; - } - - bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end); - float src_z_step = (float)(src_end + 1 - src_start) / - (float)(dst_end + 1 - dst_start); - - if (flip_z) { - src_start = src_end; - src_z_step *= -1; - } + const VkExtent3D dest_extent = { + .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x, + .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y, + .depth = 1, + }; - unsigned src_x0 = pRegions[r].srcOffsets[0].x; - unsigned src_x1 = pRegions[r].srcOffsets[1].x; - unsigned dst_x0 = pRegions[r].dstOffsets[0].x; - unsigned dst_x1 = pRegions[r].dstOffsets[1].x; + const VkExtent3D src_extent = { + .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x, + .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y, + .depth = pRegions[r].srcOffsets[1].z - pRegions[r].srcOffsets[0].z, + }; - unsigned src_y0 = pRegions[r].srcOffsets[0].y; - unsigned src_y1 = pRegions[r].srcOffsets[1].y; - unsigned dst_y0 = pRegions[r].dstOffsets[0].y; - unsigned dst_y1 = pRegions[r].dstOffsets[1].y; - VkRect2D dest_box; - dest_box.offset.x = MIN2(dst_x0, dst_x1); - dest_box.offset.y = MIN2(dst_y0, dst_y1); - dest_box.extent.width = abs(dst_x1 - dst_x0); - dest_box.extent.height = abs(dst_y1 - dst_y0); + if (pRegions[r].srcSubresource.layerCount > 1) + radv_finishme("FINISHME: copy multiple array layers"); struct radv_image_view dest_iview; unsigned usage; - if (dst_res->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) + if (pRegions[r].dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; else usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; - const unsigned num_layers = dst_end - dst_start; - for (unsigned i = 0; i < num_layers; i++) { - const VkOffset3D dest_offset_0 = { - .x = dst_x0, - .y = dst_y0, - .z = dst_start + i , - }; - const VkOffset3D dest_offset_1 = { - .x = dst_x1, - .y = dst_y1, - .z = dst_start + i , - }; - VkOffset3D src_offset_0 = { - .x = src_x0, - .y = src_y0, - .z = src_start + i * src_z_step, + for (unsigned i = pRegions[r].dstOffsets[0].z; i < pRegions[r].dstOffsets[1].z; i++) { + + const VkOffset3D dest_offset = { + .x = pRegions[r].dstOffsets[0].x, + .y = pRegions[r].dstOffsets[0].y, + .z = i, }; - VkOffset3D src_offset_1 = { - .x = src_x1, - .y = src_y1, - .z = src_start + i * src_z_step, + VkOffset3D src_offset = { + .x = pRegions[r].srcOffsets[0].x, + .y = pRegions[r].srcOffsets[0].y, + .z = i, }; const uint32_t dest_array_slice = - radv_meta_get_iview_layer(dest_image, dst_res, - &dest_offset_0); + radv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &dest_offset); radv_image_view_init(&dest_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { @@ -619,8 +557,8 @@ void radv_CmdBlitImage( .viewType = radv_meta_get_view_type(dest_image), .format = dest_image->vk_format, .subresourceRange = { - .aspectMask = dst_res->aspectMask, - .baseMipLevel = dst_res->mipLevel, + .aspectMask = pRegions[r].dstSubresource.aspectMask, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, .levelCount = 1, .baseArrayLayer = dest_array_slice, .layerCount = 1 @@ -629,10 +567,9 @@ void radv_CmdBlitImage( cmd_buffer, usage); meta_emit_blit(cmd_buffer, src_image, &src_iview, - src_offset_0, src_offset_1, + src_offset, src_extent, dest_image, &dest_iview, - dest_offset_0, dest_offset_1, - dest_box, + dest_offset, dest_extent, filter); } } @@ -820,8 +757,8 @@ radv_device_init_meta_blit_color(struct radv_device *device, }, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, + .viewportCount = 0, + .scissorCount = 0, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, @@ -849,10 +786,8 @@ radv_device_init_meta_blit_color(struct radv_device *device, }, .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 4, + .dynamicStateCount = 2, .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_LINE_WIDTH, VK_DYNAMIC_STATE_BLEND_CONSTANTS, }, @@ -999,8 +934,8 @@ radv_device_init_meta_blit_depth(struct radv_device *device, }, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, + .viewportCount = 0, + .scissorCount = 0, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, @@ -1028,10 +963,8 @@ radv_device_init_meta_blit_depth(struct radv_device *device, }, .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 9, + .dynamicStateCount = 7, .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_LINE_WIDTH, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, @@ -1180,8 +1113,8 @@ radv_device_init_meta_blit_stencil(struct radv_device *device, }, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, + .viewportCount = 0, + .scissorCount = 0, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, @@ -1229,10 +1162,8 @@ radv_device_init_meta_blit_stencil(struct radv_device *device, .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 6, + .dynamicStateCount = 4, .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_LINE_WIDTH, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, @@ -1289,7 +1220,6 @@ radv_device_init_meta_blit_state(struct radv_device *device) VkDescriptorSetLayoutCreateInfo ds_layout_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, .bindingCount = 1, .pBindings = (VkDescriptorSetLayoutBinding[]) { { diff --git a/lib/mesa/src/amd/vulkan/radv_meta_blit2d.c b/lib/mesa/src/amd/vulkan/radv_meta_blit2d.c index f69fec8ea..52e142f68 100644 --- a/lib/mesa/src/amd/vulkan/radv_meta_blit2d.c +++ b/lib/mesa/src/amd/vulkan/radv_meta_blit2d.c @@ -26,7 +26,6 @@ #include "radv_meta.h" #include "nir/nir_builder.h" -#include "vk_format.h" enum blit2d_dst_type { /* We can bind this destination as a "normal" render target and render @@ -104,6 +103,8 @@ create_bview(struct radv_cmd_buffer *cmd_buffer, struct blit2d_src_temps { struct radv_image_view iview; + + VkDescriptorSet set; struct radv_buffer_view bview; }; @@ -111,28 +112,33 @@ static void blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img, struct radv_meta_blit2d_buffer *src_buf, + struct radv_meta_blit2d_rect *rect, struct blit2d_src_temps *tmp, enum blit2d_src_type src_type, VkFormat depth_format) { struct radv_device *device = cmd_buffer->device; + VkDevice vk_device = radv_device_to_handle(cmd_buffer->device); if (src_type == BLIT2D_SRC_TYPE_BUFFER) { create_bview(cmd_buffer, src_buf, &tmp->bview, depth_format); - radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d.p_layouts[src_type], - 0, /* set */ - 1, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, - .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->bview) } - } - }); + radv_temp_descriptor_set_create(cmd_buffer->device, cmd_buffer, + device->meta_state.blit2d.ds_layouts[src_type], + &tmp->set); + + radv_UpdateDescriptorSets(vk_device, + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->bview) } + } + }, 0, NULL); radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.blit2d.p_layouts[src_type], @@ -142,27 +148,44 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer, create_iview(cmd_buffer, src_img, VK_IMAGE_USAGE_SAMPLED_BIT, &tmp->iview, depth_format); - radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d.p_layouts[src_type], - 0, /* set */ - 1, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&tmp->iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - } - }); + radv_temp_descriptor_set_create(cmd_buffer->device, cmd_buffer, + device->meta_state.blit2d.ds_layouts[src_type], + &tmp->set); + + radv_UpdateDescriptorSets(vk_device, + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = NULL, + .imageView = radv_image_view_to_handle(&tmp->iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + } + + radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit2d.p_layouts[src_type], 0, 1, + &tmp->set, 0, NULL); +} + +static void +blit2d_unbind_src(struct radv_cmd_buffer *cmd_buffer, + struct blit2d_src_temps *tmp, + enum blit2d_src_type src_type) +{ + radv_temp_descriptor_set_destroy(cmd_buffer->device, tmp->set); } struct blit2d_dst_temps { @@ -261,12 +284,10 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, for (unsigned r = 0; r < num_rects; ++r) { VkFormat depth_format = 0; - if (dst->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) - depth_format = vk_format_stencil_only(dst->image->vk_format); - else if (dst->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) - depth_format = vk_format_depth_only(dst->image->vk_format); + if (dst->aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT) + depth_format = dst->image->vk_format; struct blit2d_src_temps src_temps; - blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format); + blit2d_bind_src(cmd_buffer, src_img, src_buf, &rects[r], &src_temps, src_type, depth_format); uint32_t offset = 0; struct blit2d_dst_temps dst_temps; @@ -282,8 +303,8 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, vb_data[0] = (struct blit_vb_data) { .pos = { - -1.0, - -1.0, + rects[r].dst_x, + rects[r].dst_y, }, .tex_coord = { rects[r].src_x, @@ -293,8 +314,8 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, vb_data[1] = (struct blit_vb_data) { .pos = { - -1.0, - 1.0, + rects[r].dst_x, + rects[r].dst_y + rects[r].height, }, .tex_coord = { rects[r].src_x, @@ -304,8 +325,8 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, vb_data[2] = (struct blit_vb_data) { .pos = { - 1.0, - -1.0, + rects[r].dst_x + rects[r].width, + rects[r].dst_y, }, .tex_coord = { rects[r].src_x + rects[r].width, @@ -385,28 +406,13 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, bind_stencil_pipeline(cmd_buffer, src_type); } - radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { - .x = rects[r].dst_x, - .y = rects[r].dst_y, - .width = rects[r].width, - .height = rects[r].height, - .minDepth = 0.0f, - .maxDepth = 1.0f - }); - - radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) { - .offset = (VkOffset2D) { rects[r].dst_x, rects[r].dst_y }, - .extent = (VkExtent2D) { rects[r].width, rects[r].height }, - }); - - - radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer)); /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. */ + blit2d_unbind_src(cmd_buffer, &src_temps, src_type); blit2d_unbind_dst(cmd_buffer, &dst_temps); } } @@ -433,7 +439,7 @@ build_nir_vertex_shader(void) nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "meta_blit_vs"); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "a_pos"); @@ -568,7 +574,7 @@ build_nir_copy_fragment_shader(struct radv_device *device, nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info->name = ralloc_strdup(b.shader, name); + b.shader->info.name = ralloc_strdup(b.shader, name); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "v_tex_pos"); @@ -578,7 +584,7 @@ build_nir_copy_fragment_shader(struct radv_device *device, vec4, "f_color"); color_out->data.location = FRAG_RESULT_DATA0; - nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in)); + nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); unsigned swiz[4] = { 0, 1 }; nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false); @@ -597,7 +603,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device, nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info->name = ralloc_strdup(b.shader, name); + b.shader->info.name = ralloc_strdup(b.shader, name); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "v_tex_pos"); @@ -607,7 +613,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device, vec4, "f_color"); color_out->data.location = FRAG_RESULT_DEPTH; - nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in)); + nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); unsigned swiz[4] = { 0, 1 }; nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false); @@ -626,7 +632,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device, nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info->name = ralloc_strdup(b.shader, name); + b.shader->info.name = ralloc_strdup(b.shader, name); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "v_tex_pos"); @@ -636,7 +642,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device, vec4, "f_color"); color_out->data.location = FRAG_RESULT_STENCIL; - nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in)); + nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); unsigned swiz[4] = { 0, 1 }; nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false); @@ -790,8 +796,8 @@ blit2d_init_color_pipeline(struct radv_device *device, }, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, + .viewportCount = 0, + .scissorCount = 0, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, @@ -819,10 +825,8 @@ blit2d_init_color_pipeline(struct radv_device *device, }, .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 9, + .dynamicStateCount = 7, .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_LINE_WIDTH, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, @@ -945,8 +949,8 @@ blit2d_init_depth_only_pipeline(struct radv_device *device, }, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, + .viewportCount = 0, + .scissorCount = 0, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, @@ -974,10 +978,8 @@ blit2d_init_depth_only_pipeline(struct radv_device *device, }, .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 9, + .dynamicStateCount = 7, .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_LINE_WIDTH, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, @@ -1100,8 +1102,8 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device, }, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, + .viewportCount = 0, + .scissorCount = 0, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, @@ -1148,10 +1150,8 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device, }, .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 6, + .dynamicStateCount = 4, .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_LINE_WIDTH, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, @@ -1204,7 +1204,6 @@ radv_device_init_meta_blit2d_state(struct radv_device *device) result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &(VkDescriptorSetLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, .bindingCount = 1, .pBindings = (VkDescriptorSetLayoutBinding[]) { { @@ -1232,7 +1231,6 @@ radv_device_init_meta_blit2d_state(struct radv_device *device) result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &(VkDescriptorSetLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, .bindingCount = 1, .pBindings = (VkDescriptorSetLayoutBinding[]) { { diff --git a/lib/mesa/src/amd/vulkan/radv_meta_buffer.c b/lib/mesa/src/amd/vulkan/radv_meta_buffer.c index 0bb926fa9..adea25e02 100644 --- a/lib/mesa/src/amd/vulkan/radv_meta_buffer.c +++ b/lib/mesa/src/amd/vulkan/radv_meta_buffer.c @@ -10,17 +10,17 @@ build_buffer_fill_shader(struct radv_device *dev) nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_fill"); - b.shader->info->cs.local_size[0] = 64; - b.shader->info->cs.local_size[1] = 1; - b.shader->info->cs.local_size[2] = 1; + b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill"); + b.shader->info.cs.local_size[0] = 64; + b.shader->info.cs.local_size[1] = 1; + b.shader->info.cs.local_size[2] = 1; nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); nir_ssa_def *block_size = nir_imm_ivec4(&b, - b.shader->info->cs.local_size[0], - b.shader->info->cs.local_size[1], - b.shader->info->cs.local_size[2], 0); + b.shader->info.cs.local_size[0], + b.shader->info.cs.local_size[1], + b.shader->info.cs.local_size[2], 0); nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); @@ -60,17 +60,17 @@ build_buffer_copy_shader(struct radv_device *dev) nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_copy"); - b.shader->info->cs.local_size[0] = 64; - b.shader->info->cs.local_size[1] = 1; - b.shader->info->cs.local_size[2] = 1; + b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy"); + b.shader->info.cs.local_size[0] = 64; + b.shader->info.cs.local_size[1] = 1; + b.shader->info.cs.local_size[2] = 1; nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); nir_ssa_def *block_size = nir_imm_ivec4(&b, - b.shader->info->cs.local_size[0], - b.shader->info->cs.local_size[1], - b.shader->info->cs.local_size[2], 0); + b.shader->info.cs.local_size[0], + b.shader->info.cs.local_size[1], + b.shader->info.cs.local_size[2], 0); nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); @@ -126,7 +126,6 @@ VkResult radv_device_init_meta_buffer_state(struct radv_device *device) VkDescriptorSetLayoutCreateInfo fill_ds_create_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, .bindingCount = 1, .pBindings = (VkDescriptorSetLayoutBinding[]) { { @@ -148,7 +147,6 @@ VkResult radv_device_init_meta_buffer_state(struct radv_device *device) VkDescriptorSetLayoutCreateInfo copy_ds_create_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, .bindingCount = 2, .pBindings = (VkDescriptorSetLayoutBinding[]) { { @@ -299,37 +297,46 @@ static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_device *device = cmd_buffer->device; uint64_t block_count = round_up_u64(size, 1024); struct radv_meta_saved_compute_state saved_state; + VkDescriptorSet ds; radv_meta_save_compute(&saved_state, cmd_buffer, 4); + radv_temp_descriptor_set_create(device, cmd_buffer, + device->meta_state.buffer.fill_ds_layout, + &ds); + struct radv_buffer dst_buffer = { .bo = bo, .offset = offset, .size = size }; + radv_UpdateDescriptorSets(radv_device_to_handle(device), + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = ds, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &(VkDescriptorBufferInfo) { + .buffer = radv_buffer_to_handle(&dst_buffer), + .offset = 0, + .range = size + } + } + }, 0, NULL); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.buffer.fill_pipeline); - radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - device->meta_state.buffer.fill_p_layout, - 0, /* set */ - 1, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .pBufferInfo = &(VkDescriptorBufferInfo) { - .buffer = radv_buffer_to_handle(&dst_buffer), - .offset = 0, - .range = size - } - } - }); + radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_COMPUTE, + device->meta_state.buffer.fill_p_layout, 0, 1, + &ds, 0, NULL); radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.buffer.fill_p_layout, @@ -338,6 +345,8 @@ static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer, radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1); + radv_temp_descriptor_set_destroy(device, ds); + radv_meta_restore_compute(&saved_state, cmd_buffer, 4); } @@ -350,9 +359,14 @@ static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_device *device = cmd_buffer->device; uint64_t block_count = round_up_u64(size, 1024); struct radv_meta_saved_compute_state saved_state; + VkDescriptorSet ds; radv_meta_save_compute(&saved_state, cmd_buffer, 0); + radv_temp_descriptor_set_create(device, cmd_buffer, + device->meta_state.buffer.copy_ds_layout, + &ds); + struct radv_buffer dst_buffer = { .bo = dst_bo, .offset = dst_offset, @@ -365,43 +379,51 @@ static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, .size = size }; + radv_UpdateDescriptorSets(radv_device_to_handle(device), + 2, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = ds, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &(VkDescriptorBufferInfo) { + .buffer = radv_buffer_to_handle(&dst_buffer), + .offset = 0, + .range = size + } + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = ds, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &(VkDescriptorBufferInfo) { + .buffer = radv_buffer_to_handle(&src_buffer), + .offset = 0, + .range = size + } + } + }, 0, NULL); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.buffer.copy_pipeline); - radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - device->meta_state.buffer.copy_p_layout, - 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .pBufferInfo = &(VkDescriptorBufferInfo) { - .buffer = radv_buffer_to_handle(&dst_buffer), - .offset = 0, - .range = size - } - }, - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .pBufferInfo = &(VkDescriptorBufferInfo) { - .buffer = radv_buffer_to_handle(&src_buffer), - .offset = 0, - .range = size - } - } - }); + radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_COMPUTE, + device->meta_state.buffer.copy_p_layout, 0, 1, + &ds, 0, NULL); + radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1); + radv_temp_descriptor_set_destroy(device, ds); + radv_meta_restore_compute(&saved_state, cmd_buffer, 0); } @@ -489,11 +511,10 @@ void radv_CmdUpdateBuffer( VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, - const void* pData) + const uint32_t* pData) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); - bool mec = radv_cmd_buffer_uses_mec(cmd_buffer); uint64_t words = dataSize / 4; uint64_t va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo); va += dstOffset + dst_buffer->offset; @@ -501,26 +522,18 @@ void radv_CmdUpdateBuffer( assert(!(dataSize & 3)); assert(!(va & 3)); - if (!dataSize) - return; - if (dataSize < 4096) { - si_emit_cache_flush(cmd_buffer); - cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4); radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0)); - radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ? - V_370_MEM_ASYNC : V_370_MEMORY_SYNC) | + radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(cmd_buffer->cs, va); radeon_emit(cmd_buffer->cs, va >> 32); radeon_emit_array(cmd_buffer->cs, pData, words); - - radv_cmd_buffer_trace_emit(cmd_buffer); } else { uint32_t buf_offset; radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset); diff --git a/lib/mesa/src/amd/vulkan/radv_meta_bufimage.c b/lib/mesa/src/amd/vulkan/radv_meta_bufimage.c index 09a29d2d0..287ab3f25 100644 --- a/lib/mesa/src/amd/vulkan/radv_meta_bufimage.c +++ b/lib/mesa/src/amd/vulkan/radv_meta_bufimage.c @@ -1,34 +1,6 @@ -/* - * Copyright © 2016 Red Hat. - * Copyright © 2016 Bas Nieuwenhuizen - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ #include "radv_meta.h" #include "nir/nir_builder.h" -/* - * GFX queue: Compute shader implementation of image->buffer copy - * Compute queue: implementation also of buffer->image, image->image, and image clear. - */ - static nir_shader * build_nir_itob_compute_shader(struct radv_device *dev) { @@ -42,10 +14,10 @@ build_nir_itob_compute_shader(struct radv_device *dev) false, GLSL_TYPE_FLOAT); nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "meta_itob_cs"); - b.shader->info->cs.local_size[0] = 16; - b.shader->info->cs.local_size[1] = 16; - b.shader->info->cs.local_size[2] = 1; + b.shader->info.name = ralloc_strdup(b.shader, "meta_itob_cs"); + b.shader->info.cs.local_size[0] = 16; + b.shader->info.cs.local_size[1] = 16; + b.shader->info.cs.local_size[2] = 1; nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); input_img->data.descriptor_set = 0; @@ -59,9 +31,9 @@ build_nir_itob_compute_shader(struct radv_device *dev) nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); nir_ssa_def *block_size = nir_imm_ivec4(&b, - b.shader->info->cs.local_size[0], - b.shader->info->cs.local_size[1], - b.shader->info->cs.local_size[2], 0); + b.shader->info.cs.local_size[0], + b.shader->info.cs.local_size[1], + b.shader->info.cs.local_size[2], 0); nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); @@ -85,7 +57,7 @@ build_nir_itob_compute_shader(struct radv_device *dev) tex->sampler_dim = GLSL_SAMPLER_DIM_2D; tex->op = nir_texop_txf; tex->src[0].src_type = nir_tex_src_coord; - tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, 0x3)); + tex->src[0].src = nir_src_for_ssa(img_coord); tex->src[1].src_type = nir_tex_src_lod; tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); tex->dest_type = nir_type_float; @@ -133,7 +105,6 @@ radv_device_init_meta_itob_state(struct radv_device *device) */ VkDescriptorSetLayoutCreateInfo ds_create_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, .bindingCount = 2, .pBindings = (VkDescriptorSetLayoutBinding[]) { { @@ -227,546 +198,10 @@ radv_device_finish_meta_itob_state(struct radv_device *device) } } -static nir_shader * -build_nir_btoi_compute_shader(struct radv_device *dev) -{ - nir_builder b; - const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, - false, - false, - GLSL_TYPE_FLOAT); - const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, - false, - false, - GLSL_TYPE_FLOAT); - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "meta_btoi_cs"); - b.shader->info->cs.local_size[0] = 16; - b.shader->info->cs.local_size[1] = 16; - b.shader->info->cs.local_size[2] = 1; - nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, - buf_type, "s_tex"); - input_img->data.descriptor_set = 0; - input_img->data.binding = 0; - - nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, - img_type, "out_img"); - output_img->data.descriptor_set = 0; - output_img->data.binding = 1; - - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); - nir_ssa_def *block_size = nir_imm_ivec4(&b, - b.shader->info->cs.local_size[0], - b.shader->info->cs.local_size[1], - b.shader->info->cs.local_size[2], 0); - - nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); - - nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); - offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); - offset->num_components = 2; - nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset"); - nir_builder_instr_insert(&b, &offset->instr); - - nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); - stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8)); - stride->num_components = 1; - nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride"); - nir_builder_instr_insert(&b, &stride->instr); - - nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); - nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); - - nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa); - tmp = nir_iadd(&b, tmp, pos_x); - - nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp); - - nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa); - - nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); - tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; - tex->op = nir_texop_txf; - tex->src[0].src_type = nir_tex_src_coord; - tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1)); - tex->src[1].src_type = nir_tex_src_lod; - tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); - tex->dest_type = nir_type_float; - tex->is_array = false; - tex->coord_components = 1; - tex->texture = nir_deref_var_create(tex, input_img); - tex->sampler = NULL; - - nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); - nir_builder_instr_insert(&b, &tex->instr); - - nir_ssa_def *outval = &tex->dest.ssa; - nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store); - store->src[0] = nir_src_for_ssa(img_coord); - store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); - store->src[2] = nir_src_for_ssa(outval); - store->variables[0] = nir_deref_var_create(store, output_img); - - nir_builder_instr_insert(&b, &store->instr); - return b.shader; -} - -/* Buffer to image - don't write use image accessors */ -static VkResult -radv_device_init_meta_btoi_state(struct radv_device *device) -{ - VkResult result; - struct radv_shader_module cs = { .nir = NULL }; - - zero(device->meta_state.btoi); - - cs.nir = build_nir_btoi_compute_shader(device); - - /* - * two descriptors one for the image being sampled - * one for the buffer being written. - */ - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL - }, - } - }; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), - &ds_create_info, - &device->meta_state.alloc, - &device->meta_state.btoi.img_ds_layout); - if (result != VK_SUCCESS) - goto fail; - - - VkPipelineLayoutCreateInfo pl_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.btoi.img_ds_layout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 12}, - }; - - result = radv_CreatePipelineLayout(radv_device_to_handle(device), - &pl_create_info, - &device->meta_state.alloc, - &device->meta_state.btoi.img_p_layout); - if (result != VK_SUCCESS) - goto fail; - - /* compute shader */ - - VkPipelineShaderStageCreateInfo pipeline_shader_stage = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = radv_shader_module_to_handle(&cs), - .pName = "main", - .pSpecializationInfo = NULL, - }; - - VkComputePipelineCreateInfo vk_pipeline_info = { - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .stage = pipeline_shader_stage, - .flags = 0, - .layout = device->meta_state.btoi.img_p_layout, - }; - - result = radv_CreateComputePipelines(radv_device_to_handle(device), - radv_pipeline_cache_to_handle(&device->meta_state.cache), - 1, &vk_pipeline_info, NULL, - &device->meta_state.btoi.pipeline); - if (result != VK_SUCCESS) - goto fail; - - ralloc_free(cs.nir); - return VK_SUCCESS; -fail: - ralloc_free(cs.nir); - return result; -} - -static void -radv_device_finish_meta_btoi_state(struct radv_device *device) -{ - if (device->meta_state.btoi.img_p_layout) { - radv_DestroyPipelineLayout(radv_device_to_handle(device), - device->meta_state.btoi.img_p_layout, - &device->meta_state.alloc); - } - if (device->meta_state.btoi.img_ds_layout) { - radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), - device->meta_state.btoi.img_ds_layout, - &device->meta_state.alloc); - } - if (device->meta_state.btoi.pipeline) { - radv_DestroyPipeline(radv_device_to_handle(device), - device->meta_state.btoi.pipeline, - &device->meta_state.alloc); - } -} - -static nir_shader * -build_nir_itoi_compute_shader(struct radv_device *dev) -{ - nir_builder b; - const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, - false, - false, - GLSL_TYPE_FLOAT); - const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, - false, - false, - GLSL_TYPE_FLOAT); - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "meta_itoi_cs"); - b.shader->info->cs.local_size[0] = 16; - b.shader->info->cs.local_size[1] = 16; - b.shader->info->cs.local_size[2] = 1; - nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, - buf_type, "s_tex"); - input_img->data.descriptor_set = 0; - input_img->data.binding = 0; - - nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, - img_type, "out_img"); - output_img->data.descriptor_set = 0; - output_img->data.binding = 1; - - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); - nir_ssa_def *block_size = nir_imm_ivec4(&b, - b.shader->info->cs.local_size[0], - b.shader->info->cs.local_size[1], - b.shader->info->cs.local_size[2], 0); - - nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); - - nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); - src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); - src_offset->num_components = 2; - nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset"); - nir_builder_instr_insert(&b, &src_offset->instr); - - nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); - dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8)); - dst_offset->num_components = 2; - nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset"); - nir_builder_instr_insert(&b, &dst_offset->instr); - - nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa); - - nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa); - - nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); - tex->sampler_dim = GLSL_SAMPLER_DIM_2D; - tex->op = nir_texop_txf; - tex->src[0].src_type = nir_tex_src_coord; - tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 3)); - tex->src[1].src_type = nir_tex_src_lod; - tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); - tex->dest_type = nir_type_float; - tex->is_array = false; - tex->coord_components = 2; - tex->texture = nir_deref_var_create(tex, input_img); - tex->sampler = NULL; - - nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); - nir_builder_instr_insert(&b, &tex->instr); - - nir_ssa_def *outval = &tex->dest.ssa; - nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store); - store->src[0] = nir_src_for_ssa(dst_coord); - store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); - store->src[2] = nir_src_for_ssa(outval); - store->variables[0] = nir_deref_var_create(store, output_img); - - nir_builder_instr_insert(&b, &store->instr); - return b.shader; -} - -/* image to image - don't write use image accessors */ -static VkResult -radv_device_init_meta_itoi_state(struct radv_device *device) -{ - VkResult result; - struct radv_shader_module cs = { .nir = NULL }; - - zero(device->meta_state.itoi); - - cs.nir = build_nir_itoi_compute_shader(device); - - /* - * two descriptors one for the image being sampled - * one for the buffer being written. - */ - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL - }, - } - }; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), - &ds_create_info, - &device->meta_state.alloc, - &device->meta_state.itoi.img_ds_layout); - if (result != VK_SUCCESS) - goto fail; - - - VkPipelineLayoutCreateInfo pl_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.itoi.img_ds_layout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, - }; - - result = radv_CreatePipelineLayout(radv_device_to_handle(device), - &pl_create_info, - &device->meta_state.alloc, - &device->meta_state.itoi.img_p_layout); - if (result != VK_SUCCESS) - goto fail; - - /* compute shader */ - - VkPipelineShaderStageCreateInfo pipeline_shader_stage = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = radv_shader_module_to_handle(&cs), - .pName = "main", - .pSpecializationInfo = NULL, - }; - - VkComputePipelineCreateInfo vk_pipeline_info = { - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .stage = pipeline_shader_stage, - .flags = 0, - .layout = device->meta_state.itoi.img_p_layout, - }; - - result = radv_CreateComputePipelines(radv_device_to_handle(device), - radv_pipeline_cache_to_handle(&device->meta_state.cache), - 1, &vk_pipeline_info, NULL, - &device->meta_state.itoi.pipeline); - if (result != VK_SUCCESS) - goto fail; - - ralloc_free(cs.nir); - return VK_SUCCESS; -fail: - ralloc_free(cs.nir); - return result; -} - -static void -radv_device_finish_meta_itoi_state(struct radv_device *device) -{ - if (device->meta_state.itoi.img_p_layout) { - radv_DestroyPipelineLayout(radv_device_to_handle(device), - device->meta_state.itoi.img_p_layout, - &device->meta_state.alloc); - } - if (device->meta_state.itoi.img_ds_layout) { - radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), - device->meta_state.itoi.img_ds_layout, - &device->meta_state.alloc); - } - if (device->meta_state.itoi.pipeline) { - radv_DestroyPipeline(radv_device_to_handle(device), - device->meta_state.itoi.pipeline, - &device->meta_state.alloc); - } -} - -static nir_shader * -build_nir_cleari_compute_shader(struct radv_device *dev) -{ - nir_builder b; - const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, - false, - false, - GLSL_TYPE_FLOAT); - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "meta_cleari_cs"); - b.shader->info->cs.local_size[0] = 16; - b.shader->info->cs.local_size[1] = 16; - b.shader->info->cs.local_size[2] = 1; - - nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, - img_type, "out_img"); - output_img->data.descriptor_set = 0; - output_img->data.binding = 0; - - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); - nir_ssa_def *block_size = nir_imm_ivec4(&b, - b.shader->info->cs.local_size[0], - b.shader->info->cs.local_size[1], - b.shader->info->cs.local_size[2], 0); - - nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); - - nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); - clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); - clear_val->num_components = 4; - nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value"); - nir_builder_instr_insert(&b, &clear_val->instr); - - nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store); - store->src[0] = nir_src_for_ssa(global_id); - store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); - store->src[2] = nir_src_for_ssa(&clear_val->dest.ssa); - store->variables[0] = nir_deref_var_create(store, output_img); - - nir_builder_instr_insert(&b, &store->instr); - return b.shader; -} - -static VkResult -radv_device_init_meta_cleari_state(struct radv_device *device) -{ - VkResult result; - struct radv_shader_module cs = { .nir = NULL }; - - zero(device->meta_state.cleari); - - cs.nir = build_nir_cleari_compute_shader(device); - - /* - * two descriptors one for the image being sampled - * one for the buffer being written. - */ - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL - }, - } - }; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), - &ds_create_info, - &device->meta_state.alloc, - &device->meta_state.cleari.img_ds_layout); - if (result != VK_SUCCESS) - goto fail; - - - VkPipelineLayoutCreateInfo pl_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.cleari.img_ds_layout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, - }; - - result = radv_CreatePipelineLayout(radv_device_to_handle(device), - &pl_create_info, - &device->meta_state.alloc, - &device->meta_state.cleari.img_p_layout); - if (result != VK_SUCCESS) - goto fail; - - /* compute shader */ - - VkPipelineShaderStageCreateInfo pipeline_shader_stage = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = radv_shader_module_to_handle(&cs), - .pName = "main", - .pSpecializationInfo = NULL, - }; - - VkComputePipelineCreateInfo vk_pipeline_info = { - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .stage = pipeline_shader_stage, - .flags = 0, - .layout = device->meta_state.cleari.img_p_layout, - }; - - result = radv_CreateComputePipelines(radv_device_to_handle(device), - radv_pipeline_cache_to_handle(&device->meta_state.cache), - 1, &vk_pipeline_info, NULL, - &device->meta_state.cleari.pipeline); - if (result != VK_SUCCESS) - goto fail; - - ralloc_free(cs.nir); - return VK_SUCCESS; -fail: - ralloc_free(cs.nir); - return result; -} - -static void -radv_device_finish_meta_cleari_state(struct radv_device *device) -{ - if (device->meta_state.cleari.img_p_layout) { - radv_DestroyPipelineLayout(radv_device_to_handle(device), - device->meta_state.cleari.img_p_layout, - &device->meta_state.alloc); - } - if (device->meta_state.cleari.img_ds_layout) { - radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), - device->meta_state.cleari.img_ds_layout, - &device->meta_state.alloc); - } - if (device->meta_state.cleari.pipeline) { - radv_DestroyPipeline(radv_device_to_handle(device), - device->meta_state.cleari.pipeline, - &device->meta_state.alloc); - } -} - void radv_device_finish_meta_bufimage_state(struct radv_device *device) { radv_device_finish_meta_itob_state(device); - radv_device_finish_meta_btoi_state(device); - radv_device_finish_meta_itoi_state(device); - radv_device_finish_meta_cleari_state(device); } VkResult @@ -777,41 +212,7 @@ radv_device_init_meta_bufimage_state(struct radv_device *device) result = radv_device_init_meta_itob_state(device); if (result != VK_SUCCESS) return result; - - result = radv_device_init_meta_btoi_state(device); - if (result != VK_SUCCESS) - goto fail_itob; - - result = radv_device_init_meta_itoi_state(device); - if (result != VK_SUCCESS) - goto fail_btoi; - - result = radv_device_init_meta_cleari_state(device); - if (result != VK_SUCCESS) - goto fail_itoi; - return VK_SUCCESS; -fail_itoi: - radv_device_finish_meta_itoi_state(device); -fail_btoi: - radv_device_finish_meta_btoi_state(device); -fail_itob: - radv_device_finish_meta_itob_state(device); - return result; -} - -void -radv_meta_begin_itoi(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_saved_compute_state *save) -{ - radv_meta_save_compute(save, cmd_buffer, 16); -} - -void -radv_meta_end_itoi(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_saved_compute_state *save) -{ - radv_meta_restore_compute(save, cmd_buffer, 16); } void @@ -828,20 +229,6 @@ radv_meta_end_bufimage(struct radv_cmd_buffer *cmd_buffer, radv_meta_restore_compute(save, cmd_buffer, 12); } -void -radv_meta_begin_cleari(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_saved_compute_state *save) -{ - radv_meta_save_compute(save, cmd_buffer, 16); -} - -void -radv_meta_end_cleari(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_saved_compute_state *save) -{ - radv_meta_restore_compute(save, cmd_buffer, 16); -} - static void create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, @@ -886,227 +273,86 @@ create_bview(struct radv_cmd_buffer *cmd_buffer, struct itob_temps { struct radv_image_view src_iview; + struct radv_buffer_view dst_bview; + VkDescriptorSet set; }; static void -itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, - struct itob_temps *tmp) -{ - struct radv_device *device = cmd_buffer->device; - - radv_meta_push_descriptor_set(cmd_buffer, - VK_PIPELINE_BIND_POINT_COMPUTE, - device->meta_state.itob.img_p_layout, - 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&tmp->src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - }, - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->dst_bview) }, - } - }); -} - -static void -itob_bind_pipeline(struct radv_cmd_buffer *cmd_buffer) +itob_bind_src_image(struct radv_cmd_buffer *cmd_buffer, + struct radv_meta_blit2d_surf *src, + struct radv_meta_blit2d_rect *rect, + struct itob_temps *tmp) { - VkPipeline pipeline = - cmd_buffer->device->meta_state.itob.pipeline; - - if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) { - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - } + create_iview(cmd_buffer, src, VK_IMAGE_USAGE_SAMPLED_BIT, &tmp->src_iview); } -void -radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_surf *src, - struct radv_meta_blit2d_buffer *dst, - unsigned num_rects, - struct radv_meta_blit2d_rect *rects) -{ - struct radv_device *device = cmd_buffer->device; - struct itob_temps temps; - - create_iview(cmd_buffer, src, VK_IMAGE_USAGE_SAMPLED_BIT, &temps.src_iview); - create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &temps.dst_bview); - itob_bind_descriptors(cmd_buffer, &temps); - - itob_bind_pipeline(cmd_buffer); - - for (unsigned r = 0; r < num_rects; ++r) { - unsigned push_constants[3] = { - rects[r].src_x, - rects[r].src_y, - dst->pitch - }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.itob.img_p_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, 12, - push_constants); - - radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); - } -} - -struct btoi_temps { - struct radv_buffer_view src_bview; - struct radv_image_view dst_iview; -}; - static void -btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, - struct btoi_temps *tmp) +itob_bind_dst_buffer(struct radv_cmd_buffer *cmd_buffer, + struct radv_meta_blit2d_buffer *dst, + struct radv_meta_blit2d_rect *rect, + struct itob_temps *tmp) { - struct radv_device *device = cmd_buffer->device; - - radv_meta_push_descriptor_set(cmd_buffer, - VK_PIPELINE_BIND_POINT_COMPUTE, - device->meta_state.btoi.img_p_layout, - 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->src_bview) }, - }, - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&tmp->dst_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - } - }); + create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &tmp->dst_bview); } static void -btoi_bind_pipeline(struct radv_cmd_buffer *cmd_buffer) -{ - VkPipeline pipeline = - cmd_buffer->device->meta_state.btoi.pipeline; - - if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) { - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - } -} - -void -radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_buffer *src, - struct radv_meta_blit2d_surf *dst, - unsigned num_rects, - struct radv_meta_blit2d_rect *rects) +itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, + struct itob_temps *tmp) { struct radv_device *device = cmd_buffer->device; - struct btoi_temps temps; - - create_bview(cmd_buffer, src->buffer, src->offset, src->format, &temps.src_bview); - create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &temps.dst_iview); - btoi_bind_descriptors(cmd_buffer, &temps); - - btoi_bind_pipeline(cmd_buffer); - - for (unsigned r = 0; r < num_rects; ++r) { - unsigned push_constants[3] = { - rects[r].dst_x, - rects[r].dst_y, - src->pitch - }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.btoi.img_p_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, 12, - push_constants); - - radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); - } + VkDevice vk_device = radv_device_to_handle(cmd_buffer->device); + + radv_temp_descriptor_set_create(device, cmd_buffer, + device->meta_state.itob.img_ds_layout, + &tmp->set); + + radv_UpdateDescriptorSets(vk_device, + 2, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = NULL, + .imageView = radv_image_view_to_handle(&tmp->src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->dst_bview) }, + } + }, 0, NULL); + + radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_COMPUTE, + device->meta_state.itob.img_p_layout, 0, 1, + &tmp->set, 0, NULL); } -struct itoi_temps { - struct radv_image_view src_iview; - struct radv_image_view dst_iview; -}; - static void -itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, - struct itoi_temps *tmp) +itob_unbind_src_image(struct radv_cmd_buffer *cmd_buffer, + struct itob_temps *temps) { - struct radv_device *device = cmd_buffer->device; - - radv_meta_push_descriptor_set(cmd_buffer, - VK_PIPELINE_BIND_POINT_COMPUTE, - device->meta_state.itoi.img_p_layout, - 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&tmp->src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - }, - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&tmp->dst_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - } - }); } static void -itoi_bind_pipeline(struct radv_cmd_buffer *cmd_buffer) +bind_pipeline(struct radv_cmd_buffer *cmd_buffer) { VkPipeline pipeline = - cmd_buffer->device->meta_state.itoi.pipeline; + cmd_buffer->device->meta_state.itob.pipeline; if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) { radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), @@ -1115,103 +361,36 @@ itoi_bind_pipeline(struct radv_cmd_buffer *cmd_buffer) } void -radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_surf *src, - struct radv_meta_blit2d_surf *dst, - unsigned num_rects, - struct radv_meta_blit2d_rect *rects) +radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, + struct radv_meta_blit2d_surf *src, + struct radv_meta_blit2d_buffer *dst, + unsigned num_rects, + struct radv_meta_blit2d_rect *rects) { struct radv_device *device = cmd_buffer->device; - struct itoi_temps temps; - create_iview(cmd_buffer, src, VK_IMAGE_USAGE_SAMPLED_BIT, &temps.src_iview); - create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &temps.dst_iview); + for (unsigned r = 0; r < num_rects; ++r) { + struct itob_temps temps; - itoi_bind_descriptors(cmd_buffer, &temps); + itob_bind_src_image(cmd_buffer, src, &rects[r], &temps); + itob_bind_dst_buffer(cmd_buffer, dst, &rects[r], &temps); + itob_bind_descriptors(cmd_buffer, &temps); - itoi_bind_pipeline(cmd_buffer); + bind_pipeline(cmd_buffer); - for (unsigned r = 0; r < num_rects; ++r) { - unsigned push_constants[4] = { + unsigned push_constants[3] = { rects[r].src_x, rects[r].src_y, - rects[r].dst_x, - rects[r].dst_y, + dst->pitch }; radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.itoi.img_p_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, + device->meta_state.itob.img_p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, 12, push_constants); radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); + radv_temp_descriptor_set_destroy(cmd_buffer->device, temps.set); + itob_unbind_src_image(cmd_buffer, &temps); } -} - -static void -cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, - struct radv_image_view *dst_iview) -{ - struct radv_device *device = cmd_buffer->device; - - radv_meta_push_descriptor_set(cmd_buffer, - VK_PIPELINE_BIND_POINT_COMPUTE, - device->meta_state.cleari.img_p_layout, - 0, /* set */ - 1, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(dst_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - }, - }); -} - -static void -cleari_bind_pipeline(struct radv_cmd_buffer *cmd_buffer) -{ - VkPipeline pipeline = - cmd_buffer->device->meta_state.cleari.pipeline; - - if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) { - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - } -} - -void -radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_surf *dst, - const VkClearColorValue *clear_color) -{ - struct radv_device *device = cmd_buffer->device; - struct radv_image_view dst_iview; - - create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &dst_iview); - cleari_bind_descriptors(cmd_buffer, &dst_iview); - - cleari_bind_pipeline(cmd_buffer); - - unsigned push_constants[4] = { - clear_color->uint32[0], - clear_color->uint32[1], - clear_color->uint32[2], - clear_color->uint32[3], - }; - - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.cleari.img_p_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, - push_constants); - radv_unaligned_dispatch(cmd_buffer, dst->image->extent.width, dst->image->extent.height, 1); } diff --git a/lib/mesa/src/amd/vulkan/radv_meta_clear.c b/lib/mesa/src/amd/vulkan/radv_meta_clear.c index d06cf4eeb..a3477036e 100644 --- a/lib/mesa/src/amd/vulkan/radv_meta_clear.c +++ b/lib/mesa/src/amd/vulkan/radv_meta_clear.c @@ -56,8 +56,8 @@ build_color_shaders(struct nir_shader **out_vs, nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); - vs_b.shader->info->name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs"); - fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs"); + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs"); + fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs"); const struct glsl_type *position_type = glsl_vec4_type(); const struct glsl_type *color_type = glsl_vec4_type(); @@ -98,18 +98,6 @@ build_color_shaders(struct nir_shader **out_vs, nir_copy_var(&vs_b, vs_out_color, vs_in_color); nir_copy_var(&fs_b, fs_out_color, fs_in_color); - const struct glsl_type *layer_type = glsl_int_type(); - nir_variable *vs_out_layer = - nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, - "v_layer"); - vs_out_layer->data.location = VARYING_SLOT_LAYER; - vs_out_layer->data.interpolation = INTERP_MODE_FLAT; - nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0); - nir_ssa_def *base_instance = nir_load_system_value(&vs_b, nir_intrinsic_load_base_instance, 0); - - nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance); - nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1); - *out_vs = vs_b.shader; *out_fs = fs_b.shader; } @@ -161,8 +149,8 @@ create_pipeline(struct radv_device *device, }, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, + .viewportCount = 0, + .scissorCount = 0, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, @@ -189,11 +177,9 @@ create_pipeline(struct radv_device *device, * we need only restore dynamic state was vkCmdSet. */ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 8, + .dynamicStateCount = 6, .pDynamicStates = (VkDynamicState[]) { /* Everything except stencil write mask */ - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_LINE_WIDTH, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, @@ -219,50 +205,12 @@ create_pipeline(struct radv_device *device, } static VkResult -create_color_renderpass(struct radv_device *device, - VkFormat vk_format, - uint32_t samples, - VkRenderPass *pass) -{ - return radv_CreateRenderPass(radv_device_to_handle(device), - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .format = vk_format, - .samples = samples, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 1, - .pColorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = &(VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .preserveAttachmentCount = 1, - .pPreserveAttachments = (uint32_t[]) { 0 }, - }, - .dependencyCount = 0, - }, &device->meta_state.alloc, pass); -} - -static VkResult create_color_pipeline(struct radv_device *device, + VkFormat vk_format, uint32_t samples, uint32_t frag_output, struct radv_pipeline **pipeline, - VkRenderPass pass) + VkRenderPass *pass) { struct nir_shader *vs_nir; struct nir_shader *fs_nir; @@ -322,11 +270,44 @@ create_color_pipeline(struct radv_device *device, .pAttachments = blend_attachment_state }; + result = radv_CreateRenderPass(radv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = vk_format, + .samples = samples, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 1, + .pPreserveAttachments = (uint32_t[]) { 0 }, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, pass); + if (result != VK_SUCCESS) + return result; struct radv_graphics_pipeline_create_info extra = { .use_rectlist = true, }; - result = create_pipeline(device, radv_render_pass_from_handle(pass), + result = create_pipeline(device, radv_render_pass_from_handle(*pass), samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &extra, &device->meta_state.alloc, pipeline); @@ -365,10 +346,12 @@ radv_device_finish_meta_clear_state(struct radv_device *device) for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) { destroy_pipeline(device, state->clear[i].depth_only_pipeline[j]); + destroy_render_pass(device, state->clear[i].depth_only_rp[j]); destroy_pipeline(device, state->clear[i].stencil_only_pipeline[j]); + destroy_render_pass(device, state->clear[i].stencil_only_rp[j]); destroy_pipeline(device, state->clear[i].depthstencil_pipeline[j]); + destroy_render_pass(device, state->clear[i].depthstencil_rp[j]); } - destroy_render_pass(device, state->clear[i].depthstencil_rp); } } @@ -412,22 +395,22 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const struct color_clear_vattrs vertex_data[3] = { { .position = { - -1.0, - -1.0, + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, }, .color = clear_value, }, { .position = { - -1.0, - 1.0, + clear_rect->rect.offset.x, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, }, .color = clear_value, }, { .position = { - 1.0, - -1.0, + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, }, .color = clear_value, }, @@ -461,18 +444,7 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer, pipeline_h); } - radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { - .x = clear_rect->rect.offset.x, - .y = clear_rect->rect.offset.y, - .width = clear_rect->rect.extent.width, - .height = clear_rect->rect.extent.height, - .minDepth = 0.0f, - .maxDepth = 1.0f - }); - - radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect); - - radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer); + radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false); } @@ -486,8 +458,8 @@ build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); - vs_b.shader->info->name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs"); - fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs"); + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs"); + fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs"); const struct glsl_type *position_type = glsl_vec4_type(); nir_variable *vs_in_pos = @@ -502,64 +474,17 @@ build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); - const struct glsl_type *layer_type = glsl_int_type(); - nir_variable *vs_out_layer = - nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, - "v_layer"); - vs_out_layer->data.location = VARYING_SLOT_LAYER; - vs_out_layer->data.interpolation = INTERP_MODE_FLAT; - nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0); - nir_ssa_def *base_instance = nir_load_system_value(&vs_b, nir_intrinsic_load_base_instance, 0); - - nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance); - nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1); - *out_vs = vs_b.shader; *out_fs = fs_b.shader; } static VkResult -create_depthstencil_renderpass(struct radv_device *device, - uint32_t samples, - VkRenderPass *render_pass) -{ - return radv_CreateRenderPass(radv_device_to_handle(device), - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .format = VK_FORMAT_UNDEFINED, - .samples = samples, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 0, - .pColorAttachments = NULL, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .preserveAttachmentCount = 1, - .pPreserveAttachments = (uint32_t[]) { 0 }, - }, - .dependencyCount = 0, - }, &device->meta_state.alloc, render_pass); -} - -static VkResult create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects, uint32_t samples, int index, struct radv_pipeline **pipeline, - VkRenderPass render_pass) + VkRenderPass *render_pass) { struct nir_shader *vs_nir, *fs_nir; VkResult result; @@ -610,6 +535,36 @@ create_depthstencil_pipeline(struct radv_device *device, .pAttachments = NULL, }; + result = radv_CreateRenderPass(radv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 0, + .pColorAttachments = NULL, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 1, + .pPreserveAttachments = (uint32_t[]) { 0 }, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, render_pass); + if (result != VK_SUCCESS) + return result; + struct radv_graphics_pipeline_create_info extra = { .use_rectlist = true, }; @@ -622,7 +577,7 @@ create_depthstencil_pipeline(struct radv_device *device, extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true; extra.db_stencil_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false; } - result = create_pipeline(device, radv_render_pass_from_handle(render_pass), + result = create_pipeline(device, radv_render_pass_from_handle(*render_pass), samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &extra, &device->meta_state.alloc, pipeline); return result; @@ -636,7 +591,7 @@ static bool depth_view_can_fast_clear(const struct radv_image_view *iview, clear_rect->rect.extent.width != iview->extent.width || clear_rect->rect.extent.height != iview->extent.height) return false; - if (iview->image->surface.htile_size && + if (iview->image->htile.size && iview->base_mip == 0 && iview->base_layer == 0 && radv_layout_can_expclear(iview->image, layout) && @@ -698,28 +653,25 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, VK_IMAGE_ASPECT_STENCIL_BIT)); assert(pass_att != VK_ATTACHMENT_UNUSED); - if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) - clear_value.depth = 1.0f; - const struct depthstencil_clear_vattrs vertex_data[3] = { { .position = { - -1.0, - -1.0 + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, }, .depth_clear = clear_value.depth, }, { .position = { - -1.0, - 1.0, + clear_rect->rect.offset.x, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, }, .depth_clear = clear_value.depth, }, { .position = { - 1.0, - -1.0, + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, }, .depth_clear = clear_value.depth, }, @@ -757,18 +709,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, if (depth_view_can_fast_clear(iview, subpass->depth_stencil_attachment.layout, clear_rect)) radv_set_depth_clear_regs(cmd_buffer, iview->image, clear_value, aspects); - radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { - .x = clear_rect->rect.offset.x, - .y = clear_rect->rect.offset.y, - .width = clear_rect->rect.extent.width, - .height = clear_rect->rect.extent.height, - .minDepth = 0.0f, - .maxDepth = 1.0f - }); - - radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect); - - radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer); + radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); } @@ -799,32 +740,20 @@ radv_device_init_meta_clear_state(struct radv_device *device) VkFormat format = pipeline_formats[j]; unsigned fs_key = radv_format_meta_fs_key(format); assert(!state->clear[i].color_pipelines[fs_key]); - - res = create_color_renderpass(device, format, samples, - &state->clear[i].render_pass[fs_key]); - if (res != VK_SUCCESS) - goto fail; - - res = create_color_pipeline(device, samples, 0, &state->clear[i].color_pipelines[fs_key], - state->clear[i].render_pass[fs_key]); + res = create_color_pipeline(device, format, samples, 0, &state->clear[i].color_pipelines[fs_key], + &state->clear[i].render_pass[fs_key]); if (res != VK_SUCCESS) goto fail; } - res = create_depthstencil_renderpass(device, - samples, - &state->clear[i].depthstencil_rp); - if (res != VK_SUCCESS) - goto fail; - for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) { res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, &state->clear[i].depth_only_pipeline[j], - state->clear[i].depthstencil_rp); + &state->clear[i].depth_only_rp[j]); if (res != VK_SUCCESS) goto fail; @@ -833,7 +762,7 @@ radv_device_init_meta_clear_state(struct radv_device *device) samples, j, &state->clear[i].stencil_only_pipeline[j], - state->clear[i].depthstencil_rp); + &state->clear[i].stencil_only_rp[j]); if (res != VK_SUCCESS) goto fail; @@ -843,7 +772,7 @@ radv_device_init_meta_clear_state(struct radv_device *device) samples, j, &state->clear[i].depthstencil_pipeline[j], - state->clear[i].depthstencil_rp); + &state->clear[i].depthstencil_rp[j]); if (res != VK_SUCCESS) goto fail; } @@ -858,9 +787,7 @@ fail: static bool emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, - const VkClearRect *clear_rect, - enum radv_cmd_flush_bits *pre_flush, - enum radv_cmd_flush_bits *post_flush) + const VkClearRect *clear_rect) { const struct radv_subpass *subpass = cmd_buffer->state.subpass; const uint32_t subpass_att = clear_att->colorAttachment; @@ -875,10 +802,10 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer, if (!iview->image->cmask.size && !iview->image->surface.dcc_size) return false; - if (cmd_buffer->device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS) + if (!cmd_buffer->device->allow_fast_clears) return false; - if (!radv_layout_can_fast_clear(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index))) + if (!radv_layout_has_cmask(iview->image, image_layout)) goto fail; if (vk_format_get_blocksizebits(iview->image->vk_format) > 64) goto fail; @@ -918,13 +845,9 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer, if (ret == false) goto fail; - if (pre_flush) { - cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) & ~ *pre_flush; - *pre_flush |= cmd_buffer->state.flush_bits; - } else - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + si_emit_cache_flush(cmd_buffer); /* clear cmask buffer */ if (iview->image->surface.dcc_size) { radv_fill_buffer(cmd_buffer, iview->image->bo, @@ -935,15 +858,9 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer, iview->image->offset + iview->image->cmask.offset, iview->image->cmask.size, 0); } - - if (post_flush) - *post_flush |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; - else - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | + RADV_CMD_FLAG_INV_VMEM_L1 | + RADV_CMD_FLAG_INV_GLOBAL_L2; radv_set_color_clear_regs(cmd_buffer, iview->image, subpass_att, clear_color); @@ -958,14 +875,11 @@ fail: static void emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, - const VkClearRect *clear_rect, - enum radv_cmd_flush_bits *pre_flush, - enum radv_cmd_flush_bits *post_flush) + const VkClearRect *clear_rect) { if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - if (!emit_fast_color_clear(cmd_buffer, clear_att, clear_rect, - pre_flush, post_flush)) + if (!emit_fast_color_clear(cmd_buffer, clear_att, clear_rect)) emit_color_clear(cmd_buffer, clear_att, clear_rect); } else { assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | @@ -1008,18 +922,19 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer) { struct radv_cmd_state *cmd_state = &cmd_buffer->state; struct radv_meta_saved_state saved_state; - enum radv_cmd_flush_bits pre_flush = 0; - enum radv_cmd_flush_bits post_flush = 0; if (!subpass_needs_clear(cmd_buffer)) return; radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); + if (cmd_state->framebuffer->layers > 1) + radv_finishme("clearing multi-layer framebuffer"); + VkClearRect clear_rect = { .rect = cmd_state->render_area, .baseArrayLayer = 0, - .layerCount = cmd_state->framebuffer->layers, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ }; for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { @@ -1037,7 +952,7 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer) .clearValue = cmd_state->attachments[a].clear_value, }; - emit_clear(cmd_buffer, &clear_att, &clear_rect, &pre_flush, &post_flush); + emit_clear(cmd_buffer, &clear_att, &clear_rect); cmd_state->attachments[a].pending_clear_aspects = 0; } @@ -1052,151 +967,23 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer) .clearValue = cmd_state->attachments[ds].clear_value, }; - emit_clear(cmd_buffer, &clear_att, &clear_rect, - &pre_flush, &post_flush); + emit_clear(cmd_buffer, &clear_att, &clear_rect); cmd_state->attachments[ds].pending_clear_aspects = 0; } } radv_meta_restore(&saved_state, cmd_buffer); - cmd_buffer->state.flush_bits |= post_flush; } static void -radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - VkImageLayout image_layout, - const VkImageSubresourceRange *range, - VkFormat format, int level, int layer, - const VkClearValue *clear_val) -{ - VkDevice device_h = radv_device_to_handle(cmd_buffer->device); - struct radv_image_view iview; - radv_image_view_init(&iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = radv_image_to_handle(image), - .viewType = radv_meta_get_view_type(image), - .format = format, - .subresourceRange = { - .aspectMask = range->aspectMask, - .baseMipLevel = range->baseMipLevel + level, - .levelCount = 1, - .baseArrayLayer = range->baseArrayLayer + layer, - .layerCount = 1 - }, - }, - cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - - VkFramebuffer fb; - radv_CreateFramebuffer(device_h, - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - radv_image_view_to_handle(&iview), - }, - .width = iview.extent.width, - .height = iview.extent.height, - .layers = 1 - }, - &cmd_buffer->pool->alloc, - &fb); - - VkAttachmentDescription att_desc = { - .format = iview.vk_format, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = image_layout, - .finalLayout = image_layout, - }; - - VkSubpassDescription subpass_desc = { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 0, - .pColorAttachments = NULL, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = NULL, - .preserveAttachmentCount = 0, - .pPreserveAttachments = NULL, - }; - - const VkAttachmentReference att_ref = { - .attachment = 0, - .layout = image_layout, - }; - - if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - subpass_desc.colorAttachmentCount = 1; - subpass_desc.pColorAttachments = &att_ref; - } else { - subpass_desc.pDepthStencilAttachment = &att_ref; - } - - VkRenderPass pass; - radv_CreateRenderPass(device_h, - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &att_desc, - .subpassCount = 1, - .pSubpasses = &subpass_desc, - }, - &cmd_buffer->pool->alloc, - &pass); - - radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderArea = { - .offset = { 0, 0, }, - .extent = { - .width = iview.extent.width, - .height = iview.extent.height, - }, - }, - .renderPass = pass, - .framebuffer = fb, - .clearValueCount = 0, - .pClearValues = NULL, - }, - VK_SUBPASS_CONTENTS_INLINE); - - VkClearAttachment clear_att = { - .aspectMask = range->aspectMask, - .colorAttachment = 0, - .clearValue = *clear_val, - }; - - VkClearRect clear_rect = { - .rect = { - .offset = { 0, 0 }, - .extent = { iview.extent.width, iview.extent.height }, - }, - .baseArrayLayer = range->baseArrayLayer, - .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ - }; - - emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL); - - radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer)); - radv_DestroyRenderPass(device_h, pass, - &cmd_buffer->pool->alloc); - radv_DestroyFramebuffer(device_h, fb, - &cmd_buffer->pool->alloc); -} -static void radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout image_layout, const VkClearValue *clear_value, uint32_t range_count, - const VkImageSubresourceRange *ranges, - bool cs) + const VkImageSubresourceRange *ranges) { + VkDevice device_h = radv_device_to_handle(cmd_buffer->device); VkFormat format = image->vk_format; VkClearValue internal_clear_value = *clear_value; @@ -1207,14 +994,6 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, internal_clear_value.color.uint32[0] = value; } - if (format == VK_FORMAT_R4G4_UNORM_PACK8) { - uint8_t r, g; - format = VK_FORMAT_R8_UINT; - r = float_to_ubyte(clear_value->color.float32[0]) >> 4; - g = float_to_ubyte(clear_value->color.float32[1]) >> 4; - internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf); - } - for (uint32_t r = 0; r < range_count; r++) { const VkImageSubresourceRange *range = &ranges[r]; for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) { @@ -1222,30 +1001,127 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, radv_minify(image->extent.depth, range->baseMipLevel + l) : radv_get_layerCount(image, range); for (uint32_t s = 0; s < layer_count; ++s) { - - if (cs) { - struct radv_meta_blit2d_surf surf; - surf.format = format; - surf.image = image; - surf.level = range->baseMipLevel + l; - surf.layer = range->baseArrayLayer + s; - surf.aspect_mask = range->aspectMask; - radv_meta_clear_image_cs(cmd_buffer, &surf, - &internal_clear_value.color); + struct radv_image_view iview; + radv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = radv_image_to_handle(image), + .viewType = radv_meta_get_view_type(image), + .format = format, + .subresourceRange = { + .aspectMask = range->aspectMask, + .baseMipLevel = range->baseMipLevel + l, + .levelCount = 1, + .baseArrayLayer = range->baseArrayLayer + s, + .layerCount = 1 + }, + }, + cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + + VkFramebuffer fb; + radv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + radv_image_view_to_handle(&iview), + }, + .width = iview.extent.width, + .height = iview.extent.height, + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb); + + VkAttachmentDescription att_desc = { + .format = iview.vk_format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = image_layout, + .finalLayout = image_layout, + }; + + VkSubpassDescription subpass_desc = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 0, + .pColorAttachments = NULL, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = NULL, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }; + + const VkAttachmentReference att_ref = { + .attachment = 0, + .layout = image_layout, + }; + + if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + subpass_desc.colorAttachmentCount = 1; + subpass_desc.pColorAttachments = &att_ref; } else { - radv_clear_image_layer(cmd_buffer, image, image_layout, - range, format, l, s, &internal_clear_value); + subpass_desc.pDepthStencilAttachment = &att_ref; } + + VkRenderPass pass; + radv_CreateRenderPass(device_h, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &att_desc, + .subpassCount = 1, + .pSubpasses = &subpass_desc, + }, + &cmd_buffer->pool->alloc, + &pass); + + radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderArea = { + .offset = { 0, 0, }, + .extent = { + .width = iview.extent.width, + .height = iview.extent.height, + }, + }, + .renderPass = pass, + .framebuffer = fb, + .clearValueCount = 0, + .pClearValues = NULL, + }, + VK_SUBPASS_CONTENTS_INLINE); + + VkClearAttachment clear_att = { + .aspectMask = range->aspectMask, + .colorAttachment = 0, + .clearValue = internal_clear_value, + }; + + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { iview.extent.width, iview.extent.height }, + }, + .baseArrayLayer = range->baseArrayLayer, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + + radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer)); + radv_DestroyRenderPass(device_h, pass, + &cmd_buffer->pool->alloc); + radv_DestroyFramebuffer(device_h, fb, + &cmd_buffer->pool->alloc); } } } } -union meta_saved_state { - struct radv_meta_saved_state gfx; - struct radv_meta_saved_compute_state compute; -}; - void radv_CmdClearColorImage( VkCommandBuffer commandBuffer, VkImage image_h, @@ -1256,22 +1132,15 @@ void radv_CmdClearColorImage( { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_image, image, image_h); - union meta_saved_state saved_state; - bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE; + struct radv_meta_saved_state saved_state; - if (cs) - radv_meta_begin_cleari(cmd_buffer, &saved_state.compute); - else - radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer); + radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *) pColor, - rangeCount, pRanges, cs); + rangeCount, pRanges); - if (cs) - radv_meta_end_cleari(cmd_buffer, &saved_state.compute); - else - radv_meta_restore(&saved_state.gfx, cmd_buffer); + radv_meta_restore(&saved_state, cmd_buffer); } void radv_CmdClearDepthStencilImage( @@ -1290,7 +1159,7 @@ void radv_CmdClearDepthStencilImage( radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *) pDepthStencil, - rangeCount, pRanges, false); + rangeCount, pRanges); radv_meta_restore(&saved_state, cmd_buffer); } @@ -1304,8 +1173,6 @@ void radv_CmdClearAttachments( { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_meta_saved_state saved_state; - enum radv_cmd_flush_bits pre_flush = 0; - enum radv_cmd_flush_bits post_flush = 0; if (!cmd_buffer->state.subpass) return; @@ -1317,10 +1184,9 @@ void radv_CmdClearAttachments( */ for (uint32_t a = 0; a < attachmentCount; ++a) { for (uint32_t r = 0; r < rectCount; ++r) { - emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush); + emit_clear(cmd_buffer, &pAttachments[a], &pRects[r]); } } radv_meta_restore(&saved_state, cmd_buffer); - cmd_buffer->state.flush_bits |= post_flush; } diff --git a/lib/mesa/src/amd/vulkan/radv_meta_copy.c b/lib/mesa/src/amd/vulkan/radv_meta_copy.c index 54dadde78..4c01eb7ac 100644 --- a/lib/mesa/src/amd/vulkan/radv_meta_copy.c +++ b/lib/mesa/src/amd/vulkan/radv_meta_copy.c @@ -78,13 +78,13 @@ vk_format_for_size(int bs) } static struct radv_meta_blit2d_surf -blit_surf_for_image_level_layer(struct radv_image *image, - const VkImageSubresourceLayers *subres) +blit_surf_for_image_level_layer(struct radv_image* image, VkImageAspectFlags aspectMask, + int level, int layer) { VkFormat format = image->vk_format; - if (subres->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) + if (aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) format = vk_format_depth_only(format); - else if (subres->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) + else if (aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) format = vk_format_stencil_only(format); if (!image->surface.dcc_size) @@ -93,18 +93,13 @@ blit_surf_for_image_level_layer(struct radv_image *image, return (struct radv_meta_blit2d_surf) { .format = format, .bs = vk_format_get_blocksize(format), - .level = subres->mipLevel, - .layer = subres->baseArrayLayer, + .level = level, + .layer = layer, .image = image, - .aspect_mask = subres->aspectMask, + .aspect_mask = aspectMask, }; } -union meta_saved_state { - struct radv_meta_saved_state gfx; - struct radv_meta_saved_compute_state compute; -}; - static void meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer* buffer, @@ -112,18 +107,14 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, uint32_t regionCount, const VkBufferImageCopy* pRegions) { - bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE; - union meta_saved_state saved_state; + struct radv_meta_saved_state saved_state; /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to * VK_SAMPLE_COUNT_1_BIT." */ assert(image->samples == 1); - if (cs) - radv_meta_begin_bufimage(cmd_buffer, &saved_state.compute); - else - radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer); + radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); for (unsigned r = 0; r < regionCount; r++) { @@ -159,7 +150,9 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, /* Create blit surfaces */ struct radv_meta_blit2d_surf img_bsurf = blit_surf_for_image_level_layer(image, - &pRegions[r].imageSubresource); + pRegions[r].imageSubresource.aspectMask, + pRegions[r].imageSubresource.mipLevel, + pRegions[r].imageSubresource.baseArrayLayer); struct radv_meta_blit2d_buffer buf_bsurf = { .bs = img_bsurf.bs, @@ -169,8 +162,6 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, .pitch = buf_extent_el.width, }; - if (image->type == VK_IMAGE_TYPE_3D) - img_bsurf.layer = img_offset_el.z; /* Loop through each 3D or array slice */ unsigned num_slices_3d = img_extent_el.depth; unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; @@ -183,10 +174,7 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, /* Perform Blit */ - if (cs) - radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect); - else - radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect); + radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect); /* Once we've done the blit, all of the actual information about * the image is embedded in the command buffer so we can just @@ -202,10 +190,7 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, slice_array++; } } - if (cs) - radv_meta_end_bufimage(cmd_buffer, &saved_state.compute); - else - radv_meta_restore(&saved_state.gfx, cmd_buffer); + radv_meta_restore(&saved_state, cmd_buffer); } void radv_CmdCopyBufferToImage( @@ -268,8 +253,9 @@ meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, /* Create blit surfaces */ struct radv_meta_blit2d_surf img_info = blit_surf_for_image_level_layer(image, - &pRegions[r].imageSubresource); - + pRegions[r].imageSubresource.aspectMask, + pRegions[r].imageSubresource.mipLevel, + pRegions[r].imageSubresource.baseArrayLayer); struct radv_meta_blit2d_buffer buf_info = { .bs = img_info.bs, .format = img_info.format, @@ -278,8 +264,6 @@ meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, .pitch = buf_extent_el.width, }; - if (image->type == VK_IMAGE_TYPE_3D) - img_info.layer = img_offset_el.z; /* Loop through each 3D or array slice */ unsigned num_slices_3d = img_extent_el.depth; unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; @@ -322,15 +306,19 @@ void radv_CmdCopyImageToBuffer( regionCount, pRegions); } -static void -meta_copy_image(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *src_image, - struct radv_image *dest_image, - uint32_t regionCount, - const VkImageCopy *pRegions) +void radv_CmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) { - bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE; - union meta_saved_state saved_state; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_image, src_image, srcImage); + RADV_FROM_HANDLE(radv_image, dest_image, destImage); + struct radv_meta_saved_state saved_state; /* From the Vulkan 1.0 spec: * @@ -338,10 +326,8 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer, * images, but both images must have the same number of samples. */ assert(src_image->samples == dest_image->samples); - if (cs) - radv_meta_begin_itoi(cmd_buffer, &saved_state.compute); - else - radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer); + + radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); for (unsigned r = 0; r < regionCount; r++) { assert(pRegions[r].srcSubresource.aspectMask == @@ -350,11 +336,14 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer, /* Create blit surfaces */ struct radv_meta_blit2d_surf b_src = blit_surf_for_image_level_layer(src_image, - &pRegions[r].srcSubresource); - + pRegions[r].srcSubresource.aspectMask, + pRegions[r].srcSubresource.mipLevel, + pRegions[r].srcSubresource.baseArrayLayer); struct radv_meta_blit2d_surf b_dst = blit_surf_for_image_level_layer(dest_image, - &pRegions[r].dstSubresource); + pRegions[r].dstSubresource.aspectMask, + pRegions[r].dstSubresource.mipLevel, + pRegions[r].dstSubresource.baseArrayLayer); /* for DCC */ b_src.format = b_dst.format; @@ -373,7 +362,7 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer, const VkOffset3D src_offset_el = meta_region_offset_el(src_image, &pRegions[r].srcOffset); const VkExtent3D img_extent_el = - meta_region_extent_el(dest_image, &pRegions[r].extent); + meta_region_extent_el(src_image, &pRegions[r].extent); /* Start creating blit rect */ struct radv_meta_blit2d_rect rect = { @@ -381,9 +370,6 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer, .height = img_extent_el.height, }; - if (dest_image->type == VK_IMAGE_TYPE_3D) - b_dst.layer = dst_offset_el.z; - /* Loop through each 3D or array slice */ unsigned num_slices_3d = img_extent_el.depth; unsigned num_slices_array = pRegions[r].dstSubresource.layerCount; @@ -398,10 +384,7 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer, rect.src_y = src_offset_el.y; /* Perform Blit */ - if (cs) - radv_meta_image_to_image_cs(cmd_buffer, &b_src, &b_dst, 1, &rect); - else - radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect); + radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect); b_src.layer++; b_dst.layer++; @@ -412,45 +395,5 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer, } } - if (cs) - radv_meta_end_itoi(cmd_buffer, &saved_state.compute); - else - radv_meta_restore(&saved_state.gfx, cmd_buffer); -} - -void radv_CmdCopyImage( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageCopy* pRegions) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_image, src_image, srcImage); - RADV_FROM_HANDLE(radv_image, dest_image, destImage); - - meta_copy_image(cmd_buffer, src_image, dest_image, - regionCount, pRegions); -} - -void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - struct radv_image *linear_image) -{ - struct VkImageCopy image_copy = { 0 }; - - image_copy.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - image_copy.srcSubresource.layerCount = 1; - - image_copy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - image_copy.dstSubresource.layerCount = 1; - - image_copy.extent.width = image->extent.width; - image_copy.extent.height = image->extent.height; - image_copy.extent.depth = 1; - - meta_copy_image(cmd_buffer, image, linear_image, - 1, &image_copy); + radv_meta_restore(&saved_state, cmd_buffer); } diff --git a/lib/mesa/src/amd/vulkan/radv_meta_decompress.c b/lib/mesa/src/amd/vulkan/radv_meta_decompress.c index 854b88a36..0ba6bd075 100644 --- a/lib/mesa/src/amd/vulkan/radv_meta_decompress.c +++ b/lib/mesa/src/amd/vulkan/radv_meta_decompress.c @@ -46,7 +46,7 @@ build_nir_vs(void) nir_variable *v_position; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "meta_depth_decomp_vs"); + b.shader->info.name = ralloc_strdup(b.shader, "meta_depth_decomp_vs"); a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, "a_position"); @@ -68,8 +68,8 @@ build_nir_fs(void) nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info->name = ralloc_asprintf(b.shader, - "meta_depth_decomp_noop_fs"); + b.shader->info.name = ralloc_asprintf(b.shader, + "meta_depth_decomp_noop_fs"); return b.shader; } @@ -178,8 +178,8 @@ create_pipeline(struct radv_device *device, }, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, + .viewportCount = 0, + .scissorCount = 0, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, @@ -210,14 +210,7 @@ create_pipeline(struct radv_device *device, .depthBoundsTestEnable = false, .stencilTestEnable = false, }, - .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 2, - .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }, - }, + .pDynamicState = NULL, .renderPass = device->meta_state.depth_decomp.pass, .subpass = 0, }; @@ -324,20 +317,20 @@ emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer, const struct vertex_attrs vertex_data[3] = { { .position = { - -1.0, - -1.0, + dest_offset->x, + dest_offset->y, }, }, { .position = { - -1.0, - 1.0, + dest_offset->x, + dest_offset->y + depth_decomp_extent->height, }, }, { .position = { - 1.0, - -1.0, + dest_offset->x + depth_decomp_extent->width, + dest_offset->y, }, }, }; @@ -365,20 +358,6 @@ emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer, pipeline_h); } - radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { - .x = dest_offset->x, - .y = dest_offset->y, - .width = depth_decomp_extent->width, - .height = depth_decomp_extent->height, - .minDepth = 0.0f, - .maxDepth = 1.0f - }); - - radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) { - .offset = *dest_offset, - .extent = *depth_decomp_extent, - }); - radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); } @@ -397,13 +376,13 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, uint32_t height = radv_minify(image->extent.height, subresourceRange->baseMipLevel); - if (!image->surface.htile_size) + if (!image->htile.size) return; radv_meta_save_pass(&saved_pass_state, cmd_buffer); radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); - for (uint32_t layer = 0; layer < radv_get_layerCount(image, subresourceRange); layer++) { + for (uint32_t layer = 0; layer < subresourceRange->layerCount; layer++) { struct radv_image_view iview; radv_image_view_init(&iview, cmd_buffer->device, @@ -471,7 +450,6 @@ void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageSubresourceRange *subresourceRange) { - assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL); radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, cmd_buffer->device->meta_state.depth_decomp.decompress_pipeline); } @@ -480,7 +458,6 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageSubresourceRange *subresourceRange) { - assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL); radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, cmd_buffer->device->meta_state.depth_decomp.resummarize_pipeline); } diff --git a/lib/mesa/src/amd/vulkan/radv_meta_fast_clear.c b/lib/mesa/src/amd/vulkan/radv_meta_fast_clear.c index 3393bcb25..15c9bbcb1 100644 --- a/lib/mesa/src/amd/vulkan/radv_meta_fast_clear.c +++ b/lib/mesa/src/amd/vulkan/radv_meta_fast_clear.c @@ -46,7 +46,7 @@ build_nir_vs(void) nir_variable *v_position; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "meta_fast_clear_vs"); + b.shader->info.name = ralloc_strdup(b.shader, "meta_fast_clear_vs"); a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, "a_position"); @@ -68,7 +68,7 @@ build_nir_fs(void) nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info->name = ralloc_asprintf(b.shader, + b.shader->info.name = ralloc_asprintf(b.shader, "meta_fast_clear_noop_fs"); return b.shader; @@ -214,8 +214,8 @@ create_pipeline(struct radv_device *device, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, + .viewportCount = 0, + .scissorCount = 0, }, .pRasterizationState = &rs_state, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { @@ -227,14 +227,7 @@ create_pipeline(struct radv_device *device, .alphaToOneEnable = false, }, .pColorBlendState = &blend_state, - .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 2, - .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }, - }, + .pDynamicState = NULL, .renderPass = device->meta_state.fast_clear_flush.pass, .subpass = 0, }, @@ -259,8 +252,8 @@ create_pipeline(struct radv_device *device, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, + .viewportCount = 0, + .scissorCount = 0, }, .pRasterizationState = &rs_state, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { @@ -272,14 +265,7 @@ create_pipeline(struct radv_device *device, .alphaToOneEnable = false, }, .pColorBlendState = &blend_state, - .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 2, - .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }, - }, + .pDynamicState = NULL, .renderPass = device->meta_state.fast_clear_flush.pass, .subpass = 0, }, @@ -368,24 +354,26 @@ emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer, const struct vertex_attrs vertex_data[3] = { { .position = { - -1.0, - -1.0, + 0, + 0, }, }, { .position = { - -1.0, - 1.0, + 0, + resolve_extent->height, }, }, { .position = { - 1.0, - -1.0, + resolve_extent->width, + 0, }, }, }; + cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB | + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META); radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset); struct radv_buffer vertex_buffer = { .device = device, @@ -414,77 +402,58 @@ emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer, pipeline_h); } - radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { - .x = 0, - .y = 0, - .width = resolve_extent->width, - .height = resolve_extent->height, - .minDepth = 0.0f, - .maxDepth = 1.0f - }); - - radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) { - .offset = (VkOffset2D) { 0, 0 }, - .extent = (VkExtent2D) { resolve_extent->width, resolve_extent->height }, - }); - radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META); + si_emit_cache_flush(cmd_buffer); } /** */ void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - const VkImageSubresourceRange *subresourceRange) + struct radv_image *image) { struct radv_meta_saved_state saved_state; struct radv_meta_saved_pass_state saved_pass_state; VkDevice device_h = radv_device_to_handle(cmd_buffer->device); VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); - uint32_t layer_count = radv_get_layerCount(image, subresourceRange); - assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL); radv_meta_save_pass(&saved_pass_state, cmd_buffer); radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); - for (uint32_t layer = 0; layer < layer_count; ++layer) { - struct radv_image_view iview; - - radv_image_view_init(&iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + struct radv_image_view iview; + radv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = radv_image_to_handle(image), - .viewType = radv_meta_get_view_type(image), .format = image->vk_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, .levelCount = 1, - .baseArrayLayer = subresourceRange->baseArrayLayer + layer, + .baseArrayLayer = 0, .layerCount = 1, - }, + }, }, cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - VkFramebuffer fb_h; - radv_CreateFramebuffer(device_h, - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - radv_image_view_to_handle(&iview) - }, + VkFramebuffer fb_h; + radv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + radv_image_view_to_handle(&iview) + }, .width = image->extent.width, .height = image->extent.height, .layers = 1 - }, - &cmd_buffer->pool->alloc, - &fb_h); + }, + &cmd_buffer->pool->alloc, + &fb_h); - radv_CmdBeginRenderPass(cmd_buffer_h, + radv_CmdBeginRenderPass(cmd_buffer_h, &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderPass = cmd_buffer->device->meta_state.fast_clear_flush.pass, @@ -504,15 +473,14 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, }, VK_SUBPASS_CONTENTS_INLINE); - emit_fast_clear_flush(cmd_buffer, - &(VkExtent2D) { image->extent.width, image->extent.height }, - image->fmask.size > 0); - radv_CmdEndRenderPass(cmd_buffer_h); + emit_fast_clear_flush(cmd_buffer, + &(VkExtent2D) { image->extent.width, image->extent.height }, + image->fmask.size > 0); + radv_CmdEndRenderPass(cmd_buffer_h); - radv_DestroyFramebuffer(device_h, fb_h, - &cmd_buffer->pool->alloc); + radv_DestroyFramebuffer(device_h, fb_h, + &cmd_buffer->pool->alloc); - } radv_meta_restore(&saved_state, cmd_buffer); radv_meta_restore_pass(&saved_pass_state, cmd_buffer); } diff --git a/lib/mesa/src/amd/vulkan/radv_meta_resolve.c b/lib/mesa/src/amd/vulkan/radv_meta_resolve.c index 52f7246f6..da813eb56 100644 --- a/lib/mesa/src/amd/vulkan/radv_meta_resolve.c +++ b/lib/mesa/src/amd/vulkan/radv_meta_resolve.c @@ -33,6 +33,7 @@ */ struct vertex_attrs { float position[2]; /**< 3DPRIM_RECTLIST */ + float tex_position[2]; }; /* passthrough vertex shader */ @@ -44,9 +45,11 @@ build_nir_vs(void) nir_builder b; nir_variable *a_position; nir_variable *v_position; + nir_variable *a_tex_position; + nir_variable *v_tex_position; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "meta_resolve_vs"); + b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs"); a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, "a_position"); @@ -56,7 +59,16 @@ build_nir_vs(void) "gl_Position"); v_position->data.location = VARYING_SLOT_POS; + a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "a_tex_position"); + a_tex_position->data.location = VERT_ATTRIB_GENERIC1; + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; + nir_copy_var(&b, v_position, a_position); + nir_copy_var(&b, v_tex_position, a_tex_position); return b.shader; } @@ -67,16 +79,22 @@ build_nir_fs(void) { const struct glsl_type *vec4 = glsl_vec4_type(); nir_builder b; + nir_variable *v_tex_position; /* vec4, varying texture coordinate */ nir_variable *f_color; /* vec4, fragment output color */ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info->name = ralloc_asprintf(b.shader, - "meta_resolve_fs"); + b.shader->info.name = ralloc_asprintf(b.shader, + "meta_resolve_fs"); + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color"); f_color->data.location = FRAG_RESULT_DATA0; - nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf); + + nir_copy_var(&b, f_color, v_tex_position); return b.shader; } @@ -95,11 +113,9 @@ create_pass(struct radv_device *device) attachments[i].samples = 1; attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[i].initialLayout = VK_IMAGE_LAYOUT_GENERAL; + attachments[i].finalLayout = VK_IMAGE_LAYOUT_GENERAL; } - attachments[0].initialLayout = VK_IMAGE_LAYOUT_GENERAL; - attachments[0].finalLayout = VK_IMAGE_LAYOUT_GENERAL; - attachments[1].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - attachments[1].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; result = radv_CreateRenderPass(device_h, &(VkRenderPassCreateInfo) { @@ -118,7 +134,7 @@ create_pass(struct radv_device *device) }, { .attachment = 1, - .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + .layout = VK_IMAGE_LAYOUT_GENERAL, }, }, .pResolveAttachments = NULL, @@ -182,7 +198,7 @@ create_pipeline(struct radv_device *device, .inputRate = VK_VERTEX_INPUT_RATE_VERTEX }, }, - .vertexAttributeDescriptionCount = 1, + .vertexAttributeDescriptionCount = 2, .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { { /* Position */ @@ -191,6 +207,13 @@ create_pipeline(struct radv_device *device, .format = VK_FORMAT_R32G32_SFLOAT, .offset = offsetof(struct vertex_attrs, position), }, + { + /* Texture Coordinate */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vertex_attrs, tex_position), + }, }, }, .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { @@ -200,8 +223,8 @@ create_pipeline(struct radv_device *device, }, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, + .viewportCount = 0, + .scissorCount = 0, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, @@ -236,14 +259,7 @@ create_pipeline(struct radv_device *device, } }, }, - .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 2, - .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }, - }, + .pDynamicState = NULL, .renderPass = device->meta_state.resolve.pass, .subpass = 0, }, @@ -317,6 +333,7 @@ cleanup: static void emit_resolve(struct radv_cmd_buffer *cmd_buffer, + const VkOffset2D *src_offset, const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent) { @@ -326,20 +343,32 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer, const struct vertex_attrs vertex_data[3] = { { .position = { - -1.0, - -1.0, + dest_offset->x, + dest_offset->y, + }, + .tex_position = { + src_offset->x, + src_offset->y, }, }, { .position = { - -1.0, - 1.0, + dest_offset->x, + dest_offset->y + resolve_extent->height, + }, + .tex_position = { + src_offset->x, + src_offset->y + resolve_extent->height, }, }, { .position = { - 1.0, - -1.0, + dest_offset->x + resolve_extent->width, + dest_offset->y, + }, + .tex_position = { + src_offset->x + resolve_extent->width, + src_offset->y, }, }, }; @@ -369,22 +398,9 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer, pipeline_h); } - radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { - .x = dest_offset->x, - .y = dest_offset->y, - .width = resolve_extent->width, - .height = resolve_extent->height, - .minDepth = 0.0f, - .maxDepth = 1.0f - }); - - radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) { - .offset = *dest_offset, - .extent = *resolve_extent, - }); - radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; + si_emit_cache_flush(cmd_buffer); } void radv_CmdResolveImage( @@ -424,6 +440,7 @@ void radv_CmdResolveImage( if (use_compute_resolve) { + radv_fast_clear_flush_image_inplace(cmd_buffer, src_image); radv_meta_resolve_compute_image(cmd_buffer, src_image, src_image_layout, @@ -449,9 +466,6 @@ void radv_CmdResolveImage( if (src_image->array_size > 1) radv_finishme("vkCmdResolveImage: multisample array images"); - if (dest_image->surface.dcc_size) { - radv_initialize_dcc(cmd_buffer, dest_image, 0xffffffff); - } for (uint32_t r = 0; r < region_count; ++r) { const VkImageResolve *region = ®ions[r]; @@ -491,6 +505,8 @@ void radv_CmdResolveImage( */ const struct VkExtent3D extent = radv_sanitize_image_extent(src_image->type, region->extent); + const struct VkOffset3D srcOffset = + radv_sanitize_image_offset(src_image->type, region->srcOffset); const struct VkOffset3D dstOffset = radv_sanitize_image_offset(dest_image->type, region->dstOffset); @@ -572,6 +588,10 @@ void radv_CmdResolveImage( emit_resolve(cmd_buffer, &(VkOffset2D) { + .x = srcOffset.x, + .y = srcOffset.y, + }, + &(VkOffset2D) { .x = dstOffset.x, .y = dstOffset.y, }, @@ -643,6 +663,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer) */ emit_resolve(cmd_buffer, &(VkOffset2D) { 0, 0 }, + &(VkOffset2D) { 0, 0 }, &(VkExtent2D) { fb->width, fb->height }); } diff --git a/lib/mesa/src/amd/vulkan/radv_meta_resolve_cs.c b/lib/mesa/src/amd/vulkan/radv_meta_resolve_cs.c index ffa07cac5..c6525b6f3 100644 --- a/lib/mesa/src/amd/vulkan/radv_meta_resolve_cs.c +++ b/lib/mesa/src/amd/vulkan/radv_meta_resolve_cs.c @@ -47,10 +47,10 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int sampl GLSL_TYPE_FLOAT); snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : "float"); nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); - b.shader->info->name = ralloc_strdup(b.shader, name); - b.shader->info->cs.local_size[0] = 16; - b.shader->info->cs.local_size[1] = 16; - b.shader->info->cs.local_size[2] = 1; + b.shader->info.name = ralloc_strdup(b.shader, name); + b.shader->info.cs.local_size[0] = 16; + b.shader->info.cs.local_size[1] = 16; + b.shader->info.cs.local_size[2] = 1; nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); @@ -64,9 +64,9 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int sampl nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); nir_ssa_def *block_size = nir_imm_ivec4(&b, - b.shader->info->cs.local_size[0], - b.shader->info->cs.local_size[1], - b.shader->info->cs.local_size[2], 0); + b.shader->info.cs.local_size[0], + b.shader->info.cs.local_size[1], + b.shader->info.cs.local_size[2], 0); nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); @@ -82,7 +82,7 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int sampl nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset"); nir_builder_instr_insert(&b, &dst_offset->instr); - nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3); + nir_ssa_def *img_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa); /* do a txf_ms on each sample */ nir_ssa_def *tmp; @@ -179,7 +179,6 @@ create_layout(struct radv_device *device) */ VkDescriptorSetLayoutCreateInfo ds_create_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, .bindingCount = 2, .pBindings = (VkDescriptorSetLayoutBinding[]) { { @@ -327,21 +326,6 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_compute_state saved_state; const uint32_t samples = src_image->samples; const uint32_t samples_log2 = ffs(samples) - 1; - - for (uint32_t r = 0; r < region_count; ++r) { - const VkImageResolve *region = ®ions[r]; - const uint32_t src_base_layer = - radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, - ®ion->srcOffset); - VkImageSubresourceRange range; - range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - range.baseMipLevel = region->srcSubresource.mipLevel; - range.levelCount = 1; - range.baseArrayLayer = src_base_layer; - range.layerCount = region->srcSubresource.layerCount; - radv_fast_clear_flush_image_inplace(cmd_buffer, src_image, &range); - } - radv_meta_save_compute(&saved_state, cmd_buffer, 16); for (uint32_t r = 0; r < region_count; ++r) { @@ -370,6 +354,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, ++layer) { struct radv_image_view src_iview; + VkDescriptorSet set; radv_image_view_init(&src_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -404,41 +389,49 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, cmd_buffer, VK_IMAGE_USAGE_STORAGE_BIT); - radv_meta_push_descriptor_set(cmd_buffer, - VK_PIPELINE_BIND_POINT_COMPUTE, - device->meta_state.resolve_compute.p_layout, - 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - }, - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&dest_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - } - }); + radv_temp_descriptor_set_create(device, cmd_buffer, + device->meta_state.resolve_compute.ds_layout, + &set); + + radv_UpdateDescriptorSets(radv_device_to_handle(device), + 2, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = NULL, + .imageView = radv_image_view_to_handle(&src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = set, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = NULL, + .imageView = radv_image_view_to_handle(&dest_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_COMPUTE, + device->meta_state.resolve_compute.p_layout, 0, 1, + &set, 0, NULL); VkPipeline pipeline; if (vk_format_is_int(src_image->vk_format)) @@ -461,6 +454,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants); radv_unaligned_dispatch(cmd_buffer, extent.width, extent.height, 1); + radv_temp_descriptor_set_destroy(cmd_buffer->device, set); } } radv_meta_restore_compute(&saved_state, cmd_buffer, 16); diff --git a/lib/mesa/src/amd/vulkan/radv_pipeline.c b/lib/mesa/src/amd/vulkan/radv_pipeline.c index e0c67ce5e..7c10b78e7 100644 --- a/lib/mesa/src/amd/vulkan/radv_pipeline.c +++ b/lib/mesa/src/amd/vulkan/radv_pipeline.c @@ -41,7 +41,6 @@ #include "ac_nir_to_llvm.h" #include "vk_format.h" #include "util/debug.h" - void radv_shader_variant_destroy(struct radv_device *device, struct radv_shader_variant *variant); @@ -105,22 +104,6 @@ void radv_DestroyShaderModule( vk_free2(&device->alloc, pAllocator, module); } - -static void -radv_pipeline_destroy(struct radv_device *device, - struct radv_pipeline *pipeline, - const VkAllocationCallbacks* allocator) -{ - for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) - if (pipeline->shaders[i]) - radv_shader_variant_destroy(device, pipeline->shaders[i]); - - if (pipeline->gs_copy_shader) - radv_shader_variant_destroy(device, pipeline->gs_copy_shader); - - vk_free2(&device->alloc, allocator, pipeline); -} - void radv_DestroyPipeline( VkDevice _device, VkPipeline _pipeline, @@ -132,7 +115,11 @@ void radv_DestroyPipeline( if (!_pipeline) return; - radv_pipeline_destroy(device, pipeline, pAllocator); + for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) + if (pipeline->shaders[i]) + radv_shader_variant_destroy(device, pipeline->shaders[i]); + + vk_free2(&device->alloc, pAllocator, pipeline); } @@ -145,7 +132,6 @@ radv_optimize_nir(struct nir_shader *shader) progress = false; NIR_PASS_V(shader, nir_lower_vars_to_ssa); - NIR_PASS_V(shader, nir_lower_64bit_pack); NIR_PASS_V(shader, nir_lower_alu_to_scalar); NIR_PASS_V(shader, nir_lower_phis_to_scalar); @@ -202,35 +188,24 @@ radv_shader_compile_to_nir(struct radv_device *device, assert(data + entry.size <= spec_info->pData + spec_info->dataSize); spec_entries[i].id = spec_info->pMapEntries[i].constantID; - if (spec_info->dataSize == 8) - spec_entries[i].data64 = *(const uint64_t *)data; - else - spec_entries[i].data32 = *(const uint32_t *)data; + spec_entries[i].data = *(const uint32_t *)data; } } - const struct nir_spirv_supported_extensions supported_ext = { - .draw_parameters = true, - .float64 = true, - .image_read_without_format = true, - .image_write_without_format = true, - .tessellation = true, - }; + entry_point = spirv_to_nir(spirv, module->size / 4, spec_entries, num_spec_entries, - stage, entrypoint_name, &supported_ext, &nir_options); + stage, entrypoint_name, &nir_options); nir = entry_point->shader; assert(nir->stage == stage); nir_validate_shader(nir); free(spec_entries); - /* We have to lower away local constant initializers right before we - * inline functions. That way they get properly initialized at the top - * of the function and not at the top of its caller. - */ - NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local); - NIR_PASS_V(nir, nir_lower_returns); - NIR_PASS_V(nir, nir_inline_functions); + nir_lower_returns(nir); + nir_validate_shader(nir); + + nir_inline_functions(nir); + nir_validate_shader(nir); /* Pick off the single entrypoint that we want */ foreach_list_typed_safe(nir_function, func, node, &nir->functions) { @@ -240,24 +215,26 @@ radv_shader_compile_to_nir(struct radv_device *device, assert(exec_list_length(&nir->functions) == 1); entry_point->name = ralloc_strdup(entry_point, "main"); - NIR_PASS_V(nir, nir_remove_dead_variables, - nir_var_shader_in | nir_var_shader_out | nir_var_system_value); + nir_remove_dead_variables(nir, nir_var_shader_in); + nir_remove_dead_variables(nir, nir_var_shader_out); + nir_remove_dead_variables(nir, nir_var_system_value); + nir_validate_shader(nir); - /* Now that we've deleted all but the main function, we can go ahead and - * lower the rest of the constant initializers. - */ - NIR_PASS_V(nir, nir_lower_constant_initializers, ~0); - NIR_PASS_V(nir, nir_lower_system_values); - NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); + nir_lower_system_values(nir); + nir_validate_shader(nir); } /* Vulkan uses the separate-shader linking model */ - nir->info->separate_shader = true; + nir->info.separate_shader = true; + + // nir = brw_preprocess_nir(compiler, nir); nir_shader_gather_info(nir, entry_point->impl); nir_variable_mode indirect_mask = 0; + // if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) indirect_mask |= nir_var_shader_in; + // if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) indirect_mask |= nir_var_local; nir_lower_indirect_derefs(nir, indirect_mask); @@ -280,84 +257,6 @@ radv_shader_compile_to_nir(struct radv_device *device, return nir; } -static const char *radv_get_shader_name(struct radv_shader_variant *var, - gl_shader_stage stage) -{ - switch (stage) { - case MESA_SHADER_VERTEX: return var->info.vs.as_ls ? "Vertex Shader as LS" : var->info.vs.as_es ? "Vertex Shader as ES" : "Vertex Shader as VS"; - case MESA_SHADER_GEOMETRY: return "Geometry Shader"; - case MESA_SHADER_FRAGMENT: return "Pixel Shader"; - case MESA_SHADER_COMPUTE: return "Compute Shader"; - case MESA_SHADER_TESS_CTRL: return "Tessellation Control Shader"; - case MESA_SHADER_TESS_EVAL: return var->info.tes.as_es ? "Tessellation Evaluation Shader as ES" : "Tessellation Evaluation Shader as VS"; - default: - return "Unknown shader"; - }; - -} -static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pipeline *pipeline) -{ - unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256; - struct radv_shader_variant *var; - struct ac_shader_config *conf; - int i; - FILE *file = stderr; - unsigned max_simd_waves = 10; - unsigned lds_per_wave = 0; - - for (i = 0; i < MESA_SHADER_STAGES; i++) { - if (!pipeline->shaders[i]) - continue; - var = pipeline->shaders[i]; - - conf = &var->config; - - if (i == MESA_SHADER_FRAGMENT) { - lds_per_wave = conf->lds_size * lds_increment + - align(var->info.fs.num_interp * 48, lds_increment); - } - - if (conf->num_sgprs) { - if (device->physical_device->rad_info.chip_class >= VI) - max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs); - else - max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs); - } - - if (conf->num_vgprs) - max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs); - - /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD - * that PS can use. - */ - if (lds_per_wave) - max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave); - - fprintf(file, "\n%s:\n", - radv_get_shader_name(var, i)); - if (i == MESA_SHADER_FRAGMENT) { - fprintf(file, "*** SHADER CONFIG ***\n" - "SPI_PS_INPUT_ADDR = 0x%04x\n" - "SPI_PS_INPUT_ENA = 0x%04x\n", - conf->spi_ps_input_addr, conf->spi_ps_input_ena); - } - fprintf(file, "*** SHADER STATS ***\n" - "SGPRS: %d\n" - "VGPRS: %d\n" - "Spilled SGPRs: %d\n" - "Spilled VGPRs: %d\n" - "Code Size: %d bytes\n" - "LDS: %d blocks\n" - "Scratch: %d bytes per wave\n" - "Max Waves: %d\n" - "********************\n\n\n", - conf->num_sgprs, conf->num_vgprs, - conf->spilled_sgprs, conf->spilled_vgprs, var->code_size, - conf->lds_size, conf->scratch_bytes_per_wave, - max_simd_waves); - } -} - void radv_shader_variant_destroy(struct radv_device *device, struct radv_shader_variant *variant) { @@ -368,36 +267,54 @@ void radv_shader_variant_destroy(struct radv_device *device, free(variant); } -static void radv_fill_shader_variant(struct radv_device *device, - struct radv_shader_variant *variant, - struct ac_shader_binary *binary, - gl_shader_stage stage) +static +struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device, + struct nir_shader *shader, + struct radv_pipeline_layout *layout, + const union ac_shader_variant_key *key, + void** code_out, + unsigned *code_size_out, + bool dump) { - bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0; - unsigned vgpr_comp_cnt = 0; + struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant)); + enum radeon_family chip_family = device->instance->physicalDevice.rad_info.family; + LLVMTargetMachineRef tm; + if (!variant) + return NULL; + + struct ac_nir_compiler_options options = {0}; + options.layout = layout; + if (key) + options.key = *key; - if (scratch_enabled && !device->llvm_supports_spill) - radv_finishme("shader scratch support only available with LLVM 4.0"); + struct ac_shader_binary binary; - variant->code_size = binary->code_size; - variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) | - S_00B12C_SCRATCH_EN(scratch_enabled); + options.unsafe_math = env_var_as_boolean("RADV_UNSAFE_MATH", false); + options.family = chip_family; + options.chip_class = device->instance->physicalDevice.rad_info.chip_class; + tm = ac_create_target_machine(chip_family); + ac_compile_nir_shader(tm, &binary, &variant->config, + &variant->info, shader, &options, dump); + LLVMDisposeTargetMachine(tm); - switch (stage) { - case MESA_SHADER_TESS_EVAL: - vgpr_comp_cnt = 3; - /* fallthrough */ - case MESA_SHADER_TESS_CTRL: - variant->rsrc2 |= S_00B42C_OC_LDS_EN(1); - break; + bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0; + unsigned vgpr_comp_cnt = 0; + + if (scratch_enabled) + radv_finishme("shader scratch space"); + switch (shader->stage) { case MESA_SHADER_VERTEX: - case MESA_SHADER_GEOMETRY: + variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) | + S_00B12C_SCRATCH_EN(scratch_enabled); vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt; break; case MESA_SHADER_FRAGMENT: + variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) | + S_00B12C_SCRATCH_EN(scratch_enabled); break; case MESA_SHADER_COMPUTE: - variant->rsrc2 |= + variant->rsrc2 = S_00B84C_USER_SGPR(variant->info.num_user_sgprs) | + S_00B84C_SCRATCH_EN(scratch_enabled) | S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) | S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) | S_00B84C_TG_SIZE_EN(1) | @@ -414,48 +331,13 @@ static void radv_fill_shader_variant(struct radv_device *device, S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(variant->config.float_mode); - variant->bo = device->ws->buffer_create(device->ws, binary->code_size, 256, - RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS); + variant->bo = device->ws->buffer_create(device->ws, binary.code_size, 256, + RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS); void *ptr = device->ws->buffer_map(variant->bo); - memcpy(ptr, binary->code, binary->code_size); + memcpy(ptr, binary.code, binary.code_size); device->ws->buffer_unmap(variant->bo); - -} - -static struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device, - struct nir_shader *shader, - struct radv_pipeline_layout *layout, - const union ac_shader_variant_key *key, - void** code_out, - unsigned *code_size_out, - bool dump) -{ - struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant)); - enum radeon_family chip_family = device->physical_device->rad_info.family; - LLVMTargetMachineRef tm; - if (!variant) - return NULL; - - struct ac_nir_compiler_options options = {0}; - options.layout = layout; - if (key) - options.key = *key; - - struct ac_shader_binary binary; - - options.unsafe_math = !!(device->debug_flags & RADV_DEBUG_UNSAFE_MATH); - options.family = chip_family; - options.chip_class = device->physical_device->rad_info.chip_class; - options.supports_spill = device->llvm_supports_spill; - tm = ac_create_target_machine(chip_family, options.supports_spill); - ac_compile_nir_shader(tm, &binary, &variant->config, - &variant->info, shader, &options, dump); - LLVMDisposeTargetMachine(tm); - - radv_fill_shader_variant(device, variant, &binary, shader->stage); - if (code_out) { *code_out = binary.code; *code_size_out = binary.code_size; @@ -470,43 +352,6 @@ static struct radv_shader_variant *radv_shader_variant_create(struct radv_device return variant; } -static struct radv_shader_variant * -radv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline, - struct nir_shader *nir, - void** code_out, - unsigned *code_size_out, - bool dump_shader) -{ - struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant)); - enum radeon_family chip_family = pipeline->device->physical_device->rad_info.family; - LLVMTargetMachineRef tm; - if (!variant) - return NULL; - - struct ac_nir_compiler_options options = {0}; - struct ac_shader_binary binary; - options.family = chip_family; - options.chip_class = pipeline->device->physical_device->rad_info.chip_class; - options.supports_spill = pipeline->device->llvm_supports_spill; - tm = ac_create_target_machine(chip_family, options.supports_spill); - ac_create_gs_copy_shader(tm, nir, &binary, &variant->config, &variant->info, &options, dump_shader); - LLVMDisposeTargetMachine(tm); - - radv_fill_shader_variant(pipeline->device, variant, &binary, MESA_SHADER_VERTEX); - - if (code_out) { - *code_out = binary.code; - *code_size_out = binary.code_size; - } else - free(binary.code); - free(binary.config); - free(binary.rodata); - free(binary.global_symbol_offsets); - free(binary.relocs); - free(binary.disasm_string); - variant->ref_count = 1; - return variant; -} static struct radv_shader_variant * radv_pipeline_compile(struct radv_pipeline *pipeline, @@ -516,41 +361,29 @@ radv_pipeline_compile(struct radv_pipeline *pipeline, gl_shader_stage stage, const VkSpecializationInfo *spec_info, struct radv_pipeline_layout *layout, - const union ac_shader_variant_key *key) + const union ac_shader_variant_key *key, + bool dump) { unsigned char sha1[20]; - unsigned char gs_copy_sha1[20]; struct radv_shader_variant *variant; nir_shader *nir; void *code = NULL; unsigned code_size = 0; - bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS); if (module->nir) - _mesa_sha1_compute(module->nir->info->name, - strlen(module->nir->info->name), + _mesa_sha1_compute(module->nir->info.name, + strlen(module->nir->info.name), module->sha1); - radv_hash_shader(sha1, module, entrypoint, spec_info, layout, key, 0); - if (stage == MESA_SHADER_GEOMETRY) - radv_hash_shader(gs_copy_sha1, module, entrypoint, spec_info, - layout, key, 1); - - variant = radv_create_shader_variant_from_pipeline_cache(pipeline->device, - cache, - sha1); - - if (stage == MESA_SHADER_GEOMETRY) { - pipeline->gs_copy_shader = - radv_create_shader_variant_from_pipeline_cache( - pipeline->device, - cache, - gs_copy_sha1); - } + radv_hash_shader(sha1, module, entrypoint, spec_info, layout, key); - if (variant && - (stage != MESA_SHADER_GEOMETRY || pipeline->gs_copy_shader)) - return variant; + if (cache) { + variant = radv_create_shader_variant_from_pipeline_cache(pipeline->device, + cache, + sha1); + if (variant) + return variant; + } nir = radv_shader_compile_to_nir(pipeline->device, module, entrypoint, stage, @@ -558,31 +391,12 @@ radv_pipeline_compile(struct radv_pipeline *pipeline, if (nir == NULL) return NULL; - if (!variant) { - variant = radv_shader_variant_create(pipeline->device, nir, - layout, key, &code, - &code_size, dump); - } - - if (stage == MESA_SHADER_GEOMETRY && !pipeline->gs_copy_shader) { - void *gs_copy_code = NULL; - unsigned gs_copy_code_size = 0; - pipeline->gs_copy_shader = radv_pipeline_create_gs_copy_shader( - pipeline, nir, &gs_copy_code, &gs_copy_code_size, dump); - - if (pipeline->gs_copy_shader) { - pipeline->gs_copy_shader = - radv_pipeline_cache_insert_shader(cache, - gs_copy_sha1, - pipeline->gs_copy_shader, - gs_copy_code, - gs_copy_code_size); - } - } + variant = radv_shader_variant_create(pipeline->device, nir, layout, key, + &code, &code_size, dump); if (!module->nir) - ralloc_free(nir); + ralloc_free(nir); - if (variant) + if (variant && cache) variant = radv_pipeline_cache_insert_shader(cache, sha1, variant, code, code_size); @@ -591,173 +405,6 @@ radv_pipeline_compile(struct radv_pipeline *pipeline, return variant; } -static union ac_shader_variant_key -radv_compute_tes_key(bool as_es) -{ - union ac_shader_variant_key key; - memset(&key, 0, sizeof(key)); - key.tes.as_es = as_es; - return key; -} - -static union ac_shader_variant_key -radv_compute_tcs_key(unsigned primitive_mode, unsigned input_vertices) -{ - union ac_shader_variant_key key; - memset(&key, 0, sizeof(key)); - key.tcs.primitive_mode = primitive_mode; - key.tcs.input_vertices = input_vertices; - return key; -} - -static void -radv_tess_pipeline_compile(struct radv_pipeline *pipeline, - struct radv_pipeline_cache *cache, - struct radv_shader_module *tcs_module, - struct radv_shader_module *tes_module, - const char *tcs_entrypoint, - const char *tes_entrypoint, - const VkSpecializationInfo *tcs_spec_info, - const VkSpecializationInfo *tes_spec_info, - struct radv_pipeline_layout *layout, - unsigned input_vertices) -{ - unsigned char tcs_sha1[20], tes_sha1[20]; - struct radv_shader_variant *tes_variant = NULL, *tcs_variant = NULL; - nir_shader *tes_nir, *tcs_nir; - void *tes_code = NULL, *tcs_code = NULL; - unsigned tes_code_size = 0, tcs_code_size = 0; - union ac_shader_variant_key tes_key = radv_compute_tes_key(radv_pipeline_has_gs(pipeline)); - union ac_shader_variant_key tcs_key; - bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS); - - if (tes_module->nir) - _mesa_sha1_compute(tes_module->nir->info->name, - strlen(tes_module->nir->info->name), - tes_module->sha1); - radv_hash_shader(tes_sha1, tes_module, tes_entrypoint, tes_spec_info, layout, &tes_key, 0); - - tes_variant = radv_create_shader_variant_from_pipeline_cache(pipeline->device, - cache, - tes_sha1); - - if (tes_variant) { - tcs_key = radv_compute_tcs_key(tes_variant->info.tes.primitive_mode, input_vertices); - - if (tcs_module->nir) - _mesa_sha1_compute(tcs_module->nir->info->name, - strlen(tcs_module->nir->info->name), - tcs_module->sha1); - - radv_hash_shader(tcs_sha1, tcs_module, tcs_entrypoint, tcs_spec_info, layout, &tcs_key, 0); - - tcs_variant = radv_create_shader_variant_from_pipeline_cache(pipeline->device, - cache, - tcs_sha1); - } - - if (tcs_variant && tes_variant) { - pipeline->shaders[MESA_SHADER_TESS_CTRL] = tcs_variant; - pipeline->shaders[MESA_SHADER_TESS_EVAL] = tes_variant; - return; - } - - tes_nir = radv_shader_compile_to_nir(pipeline->device, - tes_module, tes_entrypoint, MESA_SHADER_TESS_EVAL, - tes_spec_info, dump); - if (tes_nir == NULL) - return; - - tcs_nir = radv_shader_compile_to_nir(pipeline->device, - tcs_module, tcs_entrypoint, MESA_SHADER_TESS_CTRL, - tcs_spec_info, dump); - if (tcs_nir == NULL) - return; - - nir_lower_tes_patch_vertices(tes_nir, - tcs_nir->info->tess.tcs_vertices_out); - - tes_variant = radv_shader_variant_create(pipeline->device, tes_nir, - layout, &tes_key, &tes_code, - &tes_code_size, dump); - - tcs_key = radv_compute_tcs_key(tes_nir->info->tess.primitive_mode, input_vertices); - if (tcs_module->nir) - _mesa_sha1_compute(tcs_module->nir->info->name, - strlen(tcs_module->nir->info->name), - tcs_module->sha1); - - radv_hash_shader(tcs_sha1, tcs_module, tcs_entrypoint, tcs_spec_info, layout, &tcs_key, 0); - - tcs_variant = radv_shader_variant_create(pipeline->device, tcs_nir, - layout, &tcs_key, &tcs_code, - &tcs_code_size, dump); - - if (!tes_module->nir) - ralloc_free(tes_nir); - - if (!tcs_module->nir) - ralloc_free(tcs_nir); - - if (tes_variant) - tes_variant = radv_pipeline_cache_insert_shader(cache, tes_sha1, tes_variant, - tes_code, tes_code_size); - - if (tcs_variant) - tcs_variant = radv_pipeline_cache_insert_shader(cache, tcs_sha1, tcs_variant, - tcs_code, tcs_code_size); - - if (tes_code) - free(tes_code); - if (tcs_code) - free(tcs_code); - pipeline->shaders[MESA_SHADER_TESS_CTRL] = tcs_variant; - pipeline->shaders[MESA_SHADER_TESS_EVAL] = tes_variant; - return; -} - -static VkResult -radv_pipeline_scratch_init(struct radv_device *device, - struct radv_pipeline *pipeline) -{ - unsigned scratch_bytes_per_wave = 0; - unsigned max_waves = 0; - unsigned min_waves = 1; - - for (int i = 0; i < MESA_SHADER_STAGES; ++i) { - if (pipeline->shaders[i]) { - unsigned max_stage_waves = device->scratch_waves; - - scratch_bytes_per_wave = MAX2(scratch_bytes_per_wave, - pipeline->shaders[i]->config.scratch_bytes_per_wave); - - max_stage_waves = MIN2(max_stage_waves, - 4 * device->physical_device->rad_info.num_good_compute_units * - (256 / pipeline->shaders[i]->config.num_vgprs)); - max_waves = MAX2(max_waves, max_stage_waves); - } - } - - if (pipeline->shaders[MESA_SHADER_COMPUTE]) { - unsigned group_size = pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[0] * - pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[1] * - pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[2]; - min_waves = MAX2(min_waves, round_up_u32(group_size, 64)); - } - - if (scratch_bytes_per_wave) - max_waves = MIN2(max_waves, 0xffffffffu / scratch_bytes_per_wave); - - if (scratch_bytes_per_wave && max_waves < min_waves) { - /* Not really true at this moment, but will be true on first - * execution. Avoid having hanging shaders. */ - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave; - pipeline->max_waves = max_waves; - return VK_SUCCESS; -} - static uint32_t si_translate_blend_function(VkBlendOp op) { switch (op) { @@ -1021,6 +668,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline, if (blend_mrt0_is_dual_src) col_format |= (col_format & 0xf) << 4; + if (!col_format) + col_format |= V_028714_SPI_SHADER_32_R; blend->spi_shader_col_format = col_format; } @@ -1259,7 +908,7 @@ radv_pipeline_init_raster_state(struct radv_pipeline *pipeline, S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | S_0286D4_PNT_SPRITE_TOP_1(0); // vulkan is top to bottom - 1.0 at bottom - + raster->pa_cl_vs_out_cntl = S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1); raster->pa_cl_clip_cntl = S_028810_PS_UCP_MODE(3) | S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions. S_028810_ZCLIP_NEAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) | @@ -1292,19 +941,11 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline, const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState; struct radv_blend_state *blend = &pipeline->graphics.blend; struct radv_multisample_state *ms = &pipeline->graphics.ms; - unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes; + unsigned num_tile_pipes = pipeline->device->instance->physicalDevice.rad_info.num_tile_pipes; int ps_iter_samples = 1; uint32_t mask = 0xffff; - if (vkms) - ms->num_samples = vkms->rasterizationSamples; - else - ms->num_samples = 1; - - if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.force_persample) { - ps_iter_samples = ms->num_samples; - } - + ms->num_samples = vkms->rasterizationSamples; ms->pa_sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1); ms->pa_sc_aa_config = 0; ms->db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | @@ -1320,8 +961,8 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline, EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1); - if (ms->num_samples > 1) { - unsigned log_samples = util_logbase2(ms->num_samples); + if (vkms->rasterizationSamples > 1) { + unsigned log_samples = util_logbase2(vkms->rasterizationSamples); unsigned log_ps_iter_samples = util_logbase2(util_next_power_of_two(ps_iter_samples)); ms->pa_sc_mode_cntl_0 = S_028A48_MSAA_ENABLE(1); ms->pa_sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); /* CM_R_028BDC_PA_SC_LINE_CNTL */ @@ -1335,40 +976,17 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline, ms->pa_sc_mode_cntl_1 |= EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1); } - if (vkms) { - if (vkms->alphaToCoverageEnable) - blend->db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1); + if (vkms->alphaToCoverageEnable) + blend->db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1); - if (vkms->pSampleMask) - mask = vkms->pSampleMask[0] & 0xffff; + if (vkms->pSampleMask) { + mask = vkms->pSampleMask[0] & 0xffff; } ms->pa_sc_aa_mask[0] = mask | (mask << 16); ms->pa_sc_aa_mask[1] = mask | (mask << 16); } -static bool -radv_prim_can_use_guardband(enum VkPrimitiveTopology topology) -{ - switch (topology) { - case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: - case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: - case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: - case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: - case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: - return false; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: - case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: - return true; - default: - unreachable("unhandled primitive type"); - } -} - static uint32_t si_translate_prim(enum VkPrimitiveTopology topology) { @@ -1402,29 +1020,6 @@ si_translate_prim(enum VkPrimitiveTopology topology) } static uint32_t -si_conv_gl_prim_to_gs_out(unsigned gl_prim) -{ - switch (gl_prim) { - case 0: /* GL_POINTS */ - return V_028A6C_OUTPRIM_TYPE_POINTLIST; - case 1: /* GL_LINES */ - case 3: /* GL_LINE_STRIP */ - case 0xA: /* GL_LINE_STRIP_ADJACENCY_ARB */ - case 0x8E7A: /* GL_ISOLINES */ - return V_028A6C_OUTPRIM_TYPE_LINESTRIP; - - case 4: /* GL_TRIANGLES */ - case 0xc: /* GL_TRIANGLES_ADJACENCY_ARB */ - case 5: /* GL_TRIANGLE_STRIP */ - case 7: /* GL_QUADS */ - return V_028A6C_OUTPRIM_TYPE_TRISTRIP; - default: - assert(0); - return 0; - } -} - -static uint32_t si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology) { switch (topology) { @@ -1592,7 +1187,7 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, } static union ac_shader_variant_key -radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es, bool as_ls) +radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo) { union ac_shader_variant_key key; const VkPipelineVertexInputStateCreateInfo *input_state = @@ -1600,8 +1195,6 @@ radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es, memset(&key, 0, sizeof(key)); key.vs.instance_rate_inputs = 0; - key.vs.as_es = as_es; - key.vs.as_ls = as_ls; for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) { unsigned binding; @@ -1612,334 +1205,6 @@ radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es, return key; } -static void -calculate_gs_ring_sizes(struct radv_pipeline *pipeline) -{ - struct radv_device *device = pipeline->device; - unsigned num_se = device->physical_device->rad_info.max_se; - unsigned wave_size = 64; - unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */ - unsigned gs_vertex_reuse = 16 * num_se; /* GS_VERTEX_REUSE register (per SE) */ - unsigned alignment = 256 * num_se; - /* The maximum size is 63.999 MB per SE. */ - unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se; - struct ac_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info; - struct ac_es_output_info *es_info = radv_pipeline_has_tess(pipeline) ? - &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.es_info : - &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.es_info; - - /* Calculate the minimum size. */ - unsigned min_esgs_ring_size = align(es_info->esgs_itemsize * gs_vertex_reuse * - wave_size, alignment); - /* These are recommended sizes, not minimum sizes. */ - unsigned esgs_ring_size = max_gs_waves * 2 * wave_size * - es_info->esgs_itemsize * gs_info->gs.vertices_in; - unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * - gs_info->gs.max_gsvs_emit_size * 1; // no streams in VK (gs->max_gs_stream + 1); - - min_esgs_ring_size = align(min_esgs_ring_size, alignment); - esgs_ring_size = align(esgs_ring_size, alignment); - gsvs_ring_size = align(gsvs_ring_size, alignment); - - pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size); - pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size); -} - -static void si_multiwave_lds_size_workaround(struct radv_device *device, - unsigned *lds_size) -{ - /* SPI barrier management bug: - * Make sure we have at least 4k of LDS in use to avoid the bug. - * It applies to workgroup sizes of more than one wavefront. - */ - if (device->physical_device->rad_info.family == CHIP_BONAIRE || - device->physical_device->rad_info.family == CHIP_KABINI || - device->physical_device->rad_info.family == CHIP_MULLINS) - *lds_size = MAX2(*lds_size, 8); -} - -static void -calculate_tess_state(struct radv_pipeline *pipeline, - const VkGraphicsPipelineCreateInfo *pCreateInfo) -{ - unsigned num_tcs_input_cp = pCreateInfo->pTessellationState->patchControlPoints; - unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs; - unsigned num_tcs_patch_outputs; - unsigned input_vertex_size, output_vertex_size, pervertex_output_patch_size; - unsigned input_patch_size, output_patch_size, output_patch0_offset; - unsigned lds_size, hardware_lds_size; - unsigned perpatch_output_offset; - unsigned num_patches; - struct radv_tessellation_state *tess = &pipeline->graphics.tess; - - /* This calculates how shader inputs and outputs among VS, TCS, and TES - * are laid out in LDS. */ - num_tcs_inputs = util_last_bit64(pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.outputs_written); - - num_tcs_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.outputs_written); //tcs->outputs_written - num_tcs_output_cp = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; //TCS VERTICES OUT - num_tcs_patch_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.patch_outputs_written); - - /* Ensure that we only need one wave per SIMD so we don't need to check - * resource usage. Also ensures that the number of tcs in and out - * vertices per threadgroup are at most 256. - */ - input_vertex_size = num_tcs_inputs * 16; - output_vertex_size = num_tcs_outputs * 16; - - input_patch_size = num_tcs_input_cp * input_vertex_size; - - pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size; - output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; - /* Ensure that we only need one wave per SIMD so we don't need to check - * resource usage. Also ensures that the number of tcs in and out - * vertices per threadgroup are at most 256. - */ - num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4; - - /* Make sure that the data fits in LDS. This assumes the shaders only - * use LDS for the inputs and outputs. - */ - hardware_lds_size = pipeline->device->physical_device->rad_info.chip_class >= CIK ? 65536 : 32768; - num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size)); - - /* Make sure the output data fits in the offchip buffer */ - num_patches = MIN2(num_patches, - (pipeline->device->tess_offchip_block_dw_size * 4) / - output_patch_size); - - /* Not necessary for correctness, but improves performance. The - * specific value is taken from the proprietary driver. - */ - num_patches = MIN2(num_patches, 40); - - /* SI bug workaround - limit LS-HS threadgroups to only one wave. */ - if (pipeline->device->physical_device->rad_info.chip_class == SI) { - unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp); - num_patches = MIN2(num_patches, one_wave); - } - - output_patch0_offset = input_patch_size * num_patches; - perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size; - - lds_size = output_patch0_offset + output_patch_size * num_patches; - - if (pipeline->device->physical_device->rad_info.chip_class >= CIK) { - assert(lds_size <= 65536); - lds_size = align(lds_size, 512) / 512; - } else { - assert(lds_size <= 32768); - lds_size = align(lds_size, 256) / 256; - } - si_multiwave_lds_size_workaround(pipeline->device, &lds_size); - - tess->lds_size = lds_size; - - tess->tcs_in_layout = (input_patch_size / 4) | - ((input_vertex_size / 4) << 13); - tess->tcs_out_layout = (output_patch_size / 4) | - ((output_vertex_size / 4) << 13); - tess->tcs_out_offsets = (output_patch0_offset / 16) | - ((perpatch_output_offset / 16) << 16); - tess->offchip_layout = (pervertex_output_patch_size * num_patches << 16) | - (num_tcs_output_cp << 9) | num_patches; - - tess->ls_hs_config = S_028B58_NUM_PATCHES(num_patches) | - S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) | - S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp); - tess->num_patches = num_patches; - tess->num_tcs_input_cp = num_tcs_input_cp; - - struct radv_shader_variant *tes = pipeline->shaders[MESA_SHADER_TESS_EVAL]; - unsigned type = 0, partitioning = 0, topology = 0, distribution_mode = 0; - - switch (tes->info.tes.primitive_mode) { - case GL_TRIANGLES: - type = V_028B6C_TESS_TRIANGLE; - break; - case GL_QUADS: - type = V_028B6C_TESS_QUAD; - break; - case GL_ISOLINES: - type = V_028B6C_TESS_ISOLINE; - break; - } - - switch (tes->info.tes.spacing) { - case TESS_SPACING_EQUAL: - partitioning = V_028B6C_PART_INTEGER; - break; - case TESS_SPACING_FRACTIONAL_ODD: - partitioning = V_028B6C_PART_FRAC_ODD; - break; - case TESS_SPACING_FRACTIONAL_EVEN: - partitioning = V_028B6C_PART_FRAC_EVEN; - break; - default: - break; - } - - if (tes->info.tes.point_mode) - topology = V_028B6C_OUTPUT_POINT; - else if (tes->info.tes.primitive_mode == GL_ISOLINES) - topology = V_028B6C_OUTPUT_LINE; - else if (tes->info.tes.ccw) - topology = V_028B6C_OUTPUT_TRIANGLE_CW; - else - topology = V_028B6C_OUTPUT_TRIANGLE_CCW; - - if (pipeline->device->has_distributed_tess) { - if (pipeline->device->physical_device->rad_info.family == CHIP_FIJI || - pipeline->device->physical_device->rad_info.family >= CHIP_POLARIS10) - distribution_mode = V_028B6C_DISTRIBUTION_MODE_TRAPEZOIDS; - else - distribution_mode = V_028B6C_DISTRIBUTION_MODE_DONUTS; - } else - distribution_mode = V_028B6C_DISTRIBUTION_MODE_NO_DIST; - - tess->tf_param = S_028B6C_TYPE(type) | - S_028B6C_PARTITIONING(partitioning) | - S_028B6C_TOPOLOGY(topology) | - S_028B6C_DISTRIBUTION_MODE(distribution_mode); -} - -static const struct radv_prim_vertex_count prim_size_table[] = { - [V_008958_DI_PT_NONE] = {0, 0}, - [V_008958_DI_PT_POINTLIST] = {1, 1}, - [V_008958_DI_PT_LINELIST] = {2, 2}, - [V_008958_DI_PT_LINESTRIP] = {2, 1}, - [V_008958_DI_PT_TRILIST] = {3, 3}, - [V_008958_DI_PT_TRIFAN] = {3, 1}, - [V_008958_DI_PT_TRISTRIP] = {3, 1}, - [V_008958_DI_PT_LINELIST_ADJ] = {4, 4}, - [V_008958_DI_PT_LINESTRIP_ADJ] = {4, 1}, - [V_008958_DI_PT_TRILIST_ADJ] = {6, 6}, - [V_008958_DI_PT_TRISTRIP_ADJ] = {6, 2}, - [V_008958_DI_PT_RECTLIST] = {3, 3}, - [V_008958_DI_PT_LINELOOP] = {2, 1}, - [V_008958_DI_PT_POLYGON] = {3, 1}, - [V_008958_DI_PT_2D_TRI_STRIP] = {0, 0}, -}; - -static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs) -{ - unsigned gs_max_vert_out = gs->info.gs.vertices_out; - unsigned cut_mode; - - if (gs_max_vert_out <= 128) { - cut_mode = V_028A40_GS_CUT_128; - } else if (gs_max_vert_out <= 256) { - cut_mode = V_028A40_GS_CUT_256; - } else if (gs_max_vert_out <= 512) { - cut_mode = V_028A40_GS_CUT_512; - } else { - assert(gs_max_vert_out <= 1024); - cut_mode = V_028A40_GS_CUT_1024; - } - - return S_028A40_MODE(V_028A40_GS_SCENARIO_G) | - S_028A40_CUT_MODE(cut_mode)| - S_028A40_ES_WRITE_OPTIMIZE(1) | - S_028A40_GS_WRITE_OPTIMIZE(1); -} - -static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline) -{ - struct radv_shader_variant *vs; - vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : (radv_pipeline_has_tess(pipeline) ? pipeline->shaders[MESA_SHADER_TESS_EVAL] : pipeline->shaders[MESA_SHADER_VERTEX]); - - struct ac_vs_output_info *outinfo = &vs->info.vs.outinfo; - - unsigned clip_dist_mask, cull_dist_mask, total_mask; - clip_dist_mask = outinfo->clip_dist_mask; - cull_dist_mask = outinfo->cull_dist_mask; - total_mask = clip_dist_mask | cull_dist_mask; - - bool misc_vec_ena = outinfo->writes_pointsize || - outinfo->writes_layer || - outinfo->writes_viewport_index; - pipeline->graphics.pa_cl_vs_out_cntl = - S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | - S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | - S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | - S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | - S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | - S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | - S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | - cull_dist_mask << 8 | - clip_dist_mask; - -} -static void calculate_ps_inputs(struct radv_pipeline *pipeline) -{ - struct radv_shader_variant *ps, *vs; - struct ac_vs_output_info *outinfo; - - ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; - vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : (radv_pipeline_has_tess(pipeline) ? pipeline->shaders[MESA_SHADER_TESS_EVAL] : pipeline->shaders[MESA_SHADER_VERTEX]); - - outinfo = &vs->info.vs.outinfo; - - unsigned ps_offset = 0; - if (ps->info.fs.has_pcoord) { - unsigned val; - val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20); - pipeline->graphics.ps_input_cntl[ps_offset] = val; - ps_offset++; - } - - if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) { - unsigned vs_offset, flat_shade; - unsigned val; - vs_offset = outinfo->prim_id_output; - flat_shade = true; - val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); - pipeline->graphics.ps_input_cntl[ps_offset] = val; - ++ps_offset; - } - - if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) { - unsigned vs_offset, flat_shade; - unsigned val; - vs_offset = outinfo->layer_output; - flat_shade = true; - val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); - pipeline->graphics.ps_input_cntl[ps_offset] = val; - ++ps_offset; - } - - for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) { - unsigned vs_offset, flat_shade; - unsigned val; - - if (!(ps->info.fs.input_mask & (1u << i))) - continue; - - if (!(outinfo->export_mask & (1u << i))) { - pipeline->graphics.ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20); - ++ps_offset; - continue; - } - - vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1)); - if (outinfo->prim_id_output != 0xffffffff) { - if (vs_offset >= outinfo->prim_id_output) - vs_offset++; - } - if (outinfo->layer_output != 0xffffffff) { - if (vs_offset >= outinfo->layer_output) - vs_offset++; - } - flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset)); - - val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); - pipeline->graphics.ps_input_cntl[ps_offset] = val; - ++ps_offset; - } - - pipeline->graphics.ps_input_cntl_num = ps_offset; -} - VkResult radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device, @@ -1949,8 +1214,8 @@ radv_pipeline_init(struct radv_pipeline *pipeline, const VkAllocationCallbacks *alloc) { struct radv_shader_module fs_m = {0}; - VkResult result; + bool dump = getenv("RADV_DUMP_SHADERS"); if (alloc == NULL) alloc = &device->alloc; @@ -1968,62 +1233,24 @@ radv_pipeline_init(struct radv_pipeline *pipeline, radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra); + /* */ if (modules[MESA_SHADER_VERTEX]) { - bool as_es = false; - bool as_ls = false; - if (modules[MESA_SHADER_TESS_CTRL]) - as_ls = true; - else if (modules[MESA_SHADER_GEOMETRY]) - as_es = true; - union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, as_es, as_ls); + union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo); pipeline->shaders[MESA_SHADER_VERTEX] = radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_VERTEX], pStages[MESA_SHADER_VERTEX]->pName, MESA_SHADER_VERTEX, pStages[MESA_SHADER_VERTEX]->pSpecializationInfo, - pipeline->layout, &key); + pipeline->layout, &key, dump); pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_VERTEX); } - if (modules[MESA_SHADER_GEOMETRY]) { - union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, false, false); - - pipeline->shaders[MESA_SHADER_GEOMETRY] = - radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_GEOMETRY], - pStages[MESA_SHADER_GEOMETRY]->pName, - MESA_SHADER_GEOMETRY, - pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo, - pipeline->layout, &key); - - pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_GEOMETRY); - - pipeline->graphics.vgt_gs_mode = si_vgt_gs_mode(pipeline->shaders[MESA_SHADER_GEOMETRY]); - } else - pipeline->graphics.vgt_gs_mode = 0; - - if (modules[MESA_SHADER_TESS_EVAL]) { - assert(modules[MESA_SHADER_TESS_CTRL]); - - radv_tess_pipeline_compile(pipeline, - cache, - modules[MESA_SHADER_TESS_CTRL], - modules[MESA_SHADER_TESS_EVAL], - pStages[MESA_SHADER_TESS_CTRL]->pName, - pStages[MESA_SHADER_TESS_EVAL]->pName, - pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo, - pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo, - pipeline->layout, - pCreateInfo->pTessellationState->patchControlPoints); - pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_TESS_EVAL) | - mesa_to_vk_shader_stage(MESA_SHADER_TESS_CTRL); - } - if (!modules[MESA_SHADER_FRAGMENT]) { nir_builder fs_b; nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); - fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "noop_fs"); + fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "noop_fs"); fs_m.nir = fs_b.shader; modules[MESA_SHADER_FRAGMENT] = &fs_m; } @@ -2040,7 +1267,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline, stage ? stage->pName : "main", MESA_SHADER_FRAGMENT, stage ? stage->pSpecializationInfo : NULL, - pipeline->layout, &key); + pipeline->layout, &key, dump); pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_FRAGMENT); } @@ -2051,95 +1278,12 @@ radv_pipeline_init(struct radv_pipeline *pipeline, radv_pipeline_init_raster_state(pipeline, pCreateInfo); radv_pipeline_init_multisample_state(pipeline, pCreateInfo); pipeline->graphics.prim = si_translate_prim(pCreateInfo->pInputAssemblyState->topology); - pipeline->graphics.can_use_guardband = radv_prim_can_use_guardband(pCreateInfo->pInputAssemblyState->topology); - - if (radv_pipeline_has_gs(pipeline)) { - pipeline->graphics.gs_out = si_conv_gl_prim_to_gs_out(pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim); - pipeline->graphics.can_use_guardband = pipeline->graphics.gs_out == V_028A6C_OUTPRIM_TYPE_TRISTRIP; - } else { - pipeline->graphics.gs_out = si_conv_prim_to_gs_out(pCreateInfo->pInputAssemblyState->topology); - } + pipeline->graphics.gs_out = si_conv_prim_to_gs_out(pCreateInfo->pInputAssemblyState->topology); if (extra && extra->use_rectlist) { pipeline->graphics.prim = V_008958_DI_PT_RECTLIST; pipeline->graphics.gs_out = V_028A6C_OUTPRIM_TYPE_TRISTRIP; - pipeline->graphics.can_use_guardband = true; } pipeline->graphics.prim_restart_enable = !!pCreateInfo->pInputAssemblyState->primitiveRestartEnable; - /* prim vertex count will need TESS changes */ - pipeline->graphics.prim_vertex_count = prim_size_table[pipeline->graphics.prim]; - - /* Ensure that some export memory is always allocated, for two reasons: - * - * 1) Correctness: The hardware ignores the EXEC mask if no export - * memory is allocated, so KILL and alpha test do not work correctly - * without this. - * 2) Performance: Every shader needs at least a NULL export, even when - * it writes no color/depth output. The NULL export instruction - * stalls without this setting. - * - * Don't add this to CB_SHADER_MASK. - */ - struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; - if (!pipeline->graphics.blend.spi_shader_col_format) { - if (!ps->info.fs.writes_z && - !ps->info.fs.writes_stencil && - !ps->info.fs.writes_sample_mask) - pipeline->graphics.blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R; - } - - unsigned z_order; - pipeline->graphics.db_shader_control = 0; - if (ps->info.fs.early_fragment_test || !ps->info.fs.writes_memory) - z_order = V_02880C_EARLY_Z_THEN_LATE_Z; - else - z_order = V_02880C_LATE_Z; - - pipeline->graphics.db_shader_control = - S_02880C_Z_EXPORT_ENABLE(ps->info.fs.writes_z) | - S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.fs.writes_stencil) | - S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) | - S_02880C_MASK_EXPORT_ENABLE(ps->info.fs.writes_sample_mask) | - S_02880C_Z_ORDER(z_order) | - S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) | - S_02880C_EXEC_ON_HIER_FAIL(ps->info.fs.writes_memory) | - S_02880C_EXEC_ON_NOOP(ps->info.fs.writes_memory); - - pipeline->graphics.shader_z_format = - ps->info.fs.writes_sample_mask ? V_028710_SPI_SHADER_32_ABGR : - ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR : - ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R : - V_028710_SPI_SHADER_ZERO; - - calculate_pa_cl_vs_out_cntl(pipeline); - calculate_ps_inputs(pipeline); - - uint32_t stages = 0; - if (radv_pipeline_has_tess(pipeline)) { - stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | - S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1); - - if (radv_pipeline_has_gs(pipeline)) - stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | - S_028B54_GS_EN(1) | - S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); - else - stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS); - } else if (radv_pipeline_has_gs(pipeline)) - stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | - S_028B54_GS_EN(1) | - S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); - pipeline->graphics.vgt_shader_stages_en = stages; - - if (radv_pipeline_has_gs(pipeline)) - calculate_gs_ring_sizes(pipeline); - - if (radv_pipeline_has_tess(pipeline)) { - if (pipeline->graphics.prim == V_008958_DI_PT_PATCH) { - pipeline->graphics.prim_vertex_count.min = pCreateInfo->pTessellationState->patchControlPoints; - pipeline->graphics.prim_vertex_count.incr = 1; - } - calculate_tess_state(pipeline, pCreateInfo); - } const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; @@ -2175,12 +1319,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline, pipeline->binding_stride[desc->binding] = desc->stride; } - if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) { - radv_dump_pipeline_stats(device, pipeline); - } - - result = radv_pipeline_scratch_init(device, pipeline); - return result; + return VK_SUCCESS; } VkResult @@ -2206,7 +1345,7 @@ radv_graphics_pipeline_create( result = radv_pipeline_init(pipeline, device, cache, pCreateInfo, extra, pAllocator); if (result != VK_SUCCESS) { - radv_pipeline_destroy(device, pipeline, pAllocator); + vk_free2(&device->alloc, pAllocator, pipeline); return result; } @@ -2227,18 +1366,20 @@ VkResult radv_CreateGraphicsPipelines( unsigned i = 0; for (; i < count; i++) { - VkResult r; - r = radv_graphics_pipeline_create(_device, - pipelineCache, - &pCreateInfos[i], - NULL, pAllocator, &pPipelines[i]); - if (r != VK_SUCCESS) { - result = r; - pPipelines[i] = VK_NULL_HANDLE; + result = radv_graphics_pipeline_create(_device, + pipelineCache, + &pCreateInfos[i], + NULL, pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + radv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; } } - return result; + return VK_SUCCESS; } static VkResult radv_compute_pipeline_create( @@ -2252,7 +1393,7 @@ static VkResult radv_compute_pipeline_create( RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); RADV_FROM_HANDLE(radv_shader_module, module, pCreateInfo->stage.module); struct radv_pipeline *pipeline; - VkResult result; + bool dump = getenv("RADV_DUMP_SHADERS"); pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -2268,20 +1409,9 @@ static VkResult radv_compute_pipeline_create( pCreateInfo->stage.pName, MESA_SHADER_COMPUTE, pCreateInfo->stage.pSpecializationInfo, - pipeline->layout, NULL); - - - result = radv_pipeline_scratch_init(device, pipeline); - if (result != VK_SUCCESS) { - radv_pipeline_destroy(device, pipeline, pAllocator); - return result; - } + pipeline->layout, NULL, dump); *pPipeline = radv_pipeline_to_handle(pipeline); - - if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) { - radv_dump_pipeline_stats(device, pipeline); - } return VK_SUCCESS; } VkResult radv_CreateComputePipelines( @@ -2296,15 +1426,17 @@ VkResult radv_CreateComputePipelines( unsigned i = 0; for (; i < count; i++) { - VkResult r; - r = radv_compute_pipeline_create(_device, pipelineCache, - &pCreateInfos[i], - pAllocator, &pPipelines[i]); - if (r != VK_SUCCESS) { - result = r; - pPipelines[i] = VK_NULL_HANDLE; + result = radv_compute_pipeline_create(_device, pipelineCache, + &pCreateInfos[i], + pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + radv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; } } - return result; + return VK_SUCCESS; } diff --git a/lib/mesa/src/amd/vulkan/radv_pipeline_cache.c b/lib/mesa/src/amd/vulkan/radv_pipeline_cache.c index 5f6355f0d..b42935554 100644 --- a/lib/mesa/src/amd/vulkan/radv_pipeline_cache.c +++ b/lib/mesa/src/amd/vulkan/radv_pipeline_cache.c @@ -57,7 +57,7 @@ radv_pipeline_cache_init(struct radv_pipeline_cache *cache, /* We don't consider allocation failure fatal, we just start with a 0-sized * cache. */ if (cache->hash_table == NULL || - (device->debug_flags & RADV_DEBUG_NO_CACHE)) + !env_var_as_boolean("RADV_ENABLE_PIPELINE_CACHE", true)) cache->table_size = 0; else memset(cache->hash_table, 0, byte_size); @@ -88,25 +88,23 @@ radv_hash_shader(unsigned char *hash, struct radv_shader_module *module, const char *entrypoint, const VkSpecializationInfo *spec_info, const struct radv_pipeline_layout *layout, - const union ac_shader_variant_key *key, - uint32_t is_geom_copy_shader) + const union ac_shader_variant_key *key) { - struct mesa_sha1 ctx; + struct mesa_sha1 *ctx; - _mesa_sha1_init(&ctx); + ctx = _mesa_sha1_init(); if (key) - _mesa_sha1_update(&ctx, key, sizeof(*key)); - _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1)); - _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint)); + _mesa_sha1_update(ctx, key, sizeof(*key)); + _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); + _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); if (layout) - _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); + _mesa_sha1_update(ctx, layout->sha1, sizeof(layout->sha1)); if (spec_info) { - _mesa_sha1_update(&ctx, spec_info->pMapEntries, + _mesa_sha1_update(ctx, spec_info->pMapEntries, spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); - _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize); + _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize); } - _mesa_sha1_update(&ctx, &is_geom_copy_shader, 4); - _mesa_sha1_final(&ctx, hash); + _mesa_sha1_final(ctx, hash); } @@ -152,10 +150,7 @@ radv_create_shader_variant_from_pipeline_cache(struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1) { - struct cache_entry *entry = NULL; - - if (cache) - entry = radv_pipeline_cache_search(cache, sha1); + struct cache_entry *entry = radv_pipeline_cache_search(cache, sha1); if (!entry) return NULL; @@ -174,7 +169,7 @@ radv_create_shader_variant_from_pipeline_cache(struct radv_device *device, variant->ref_count = 1; variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256, - RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS); + RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS); void *ptr = device->ws->buffer_map(variant->bo); memcpy(ptr, entry->code, entry->code_size); @@ -263,9 +258,6 @@ radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache, struct radv_shader_variant *variant, const void *code, unsigned code_size) { - if (!cache) - return variant; - pthread_mutex_lock(&cache->mutex); struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1); if (entry) { @@ -311,13 +303,13 @@ struct cache_header { uint32_t device_id; uint8_t uuid[VK_UUID_SIZE]; }; - void radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size) { struct radv_device *device = cache->device; struct cache_header header; + uint8_t uuid[VK_UUID_SIZE]; if (size < sizeof(header)) return; @@ -328,9 +320,10 @@ radv_pipeline_cache_load(struct radv_pipeline_cache *cache, return; if (header.vendor_id != 0x1002) return; - if (header.device_id != device->physical_device->rad_info.pci_id) + if (header.device_id != device->instance->physicalDevice.rad_info.pci_id) return; - if (memcmp(header.uuid, device->physical_device->uuid, VK_UUID_SIZE) != 0) + radv_device_get_cache_uuid(uuid); + if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) return; char *end = (void *) data + size; @@ -428,8 +421,8 @@ VkResult radv_GetPipelineCacheData( header->header_size = sizeof(*header); header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; header->vendor_id = 0x1002; - header->device_id = device->physical_device->rad_info.pci_id; - memcpy(header->uuid, device->physical_device->uuid, VK_UUID_SIZE); + header->device_id = device->instance->physicalDevice.rad_info.pci_id; + radv_device_get_cache_uuid(header->uuid); p += header->header_size; struct cache_entry *entry; diff --git a/lib/mesa/src/amd/vulkan/radv_private.h b/lib/mesa/src/amd/vulkan/radv_private.h index 08f53a169..cfdda3654 100644 --- a/lib/mesa/src/amd/vulkan/radv_private.h +++ b/lib/mesa/src/amd/vulkan/radv_private.h @@ -53,7 +53,6 @@ #include "radv_radeon_winsys.h" #include "ac_binary.h" #include "ac_nir_to_llvm.h" -#include "radv_debug.h" #include "radv_descriptor_set.h" #include <llvm-c/TargetMachine.h> @@ -79,29 +78,14 @@ typedef uint32_t xcb_window_t; #define MAX_VIEWPORTS 16 #define MAX_SCISSORS 16 #define MAX_PUSH_CONSTANTS_SIZE 128 -#define MAX_PUSH_DESCRIPTORS 32 #define MAX_DYNAMIC_BUFFERS 16 -#define MAX_SAMPLES_LOG2 4 +#define MAX_IMAGES 8 +#define MAX_SAMPLES_LOG2 4 /* SKL supports 16 samples */ #define NUM_META_FS_KEYS 11 -#define RADV_MAX_DRM_DEVICES 8 #define NUM_DEPTH_CLEAR_PIPELINES 3 -enum radv_mem_heap { - RADV_MEM_HEAP_VRAM, - RADV_MEM_HEAP_VRAM_CPU_ACCESS, - RADV_MEM_HEAP_GTT, - RADV_MEM_HEAP_COUNT -}; - -enum radv_mem_type { - RADV_MEM_TYPE_VRAM, - RADV_MEM_TYPE_GTT_WRITE_COMBINE, - RADV_MEM_TYPE_VRAM_CPU_ACCESS, - RADV_MEM_TYPE_GTT_CACHED, - RADV_MEM_TYPE_COUNT -}; - +#define radv_noreturn __attribute__((__noreturn__)) #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) static inline uint32_t @@ -189,12 +173,20 @@ radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) __dword &= ~(1 << (b))) #define typed_memcpy(dest, src, count) ({ \ - STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \ + static_assert(sizeof(*src) == sizeof(*dest), ""); \ memcpy((dest), (src), (count) * sizeof(*(src))); \ }) #define zero(x) (memset(&(x), 0, sizeof(x))) +/* Define no kernel as 1, since that's an illegal offset for a kernel */ +#define NO_KERNEL 1 + +struct radv_common { + VkStructureType sType; + const void* pNext; +}; + /* Whenever we generate an error, pass it through this function. Useful for * debugging, where we can break on it. Only call at error site, not when * propagating errors. Might be useful to plug in a stack trace here. @@ -219,13 +211,7 @@ void radv_loge_v(const char *format, va_list va); * Print a FINISHME message, including its source location. */ #define radv_finishme(format, ...) \ - do { \ - static bool reported = false; \ - if (!reported) { \ - __radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \ - reported = true; \ - } \ - } while (0) + __radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); /* A non-fatal assert. Useful for debugging. */ #ifdef DEBUG @@ -237,6 +223,9 @@ void radv_loge_v(const char *format, va_list va); #define radv_assert(x) #endif +void radv_abortf(const char *format, ...) radv_noreturn radv_printflike(1, 2); +void radv_abortfv(const char *format, va_list va) radv_noreturn; + #define stub_return(v) \ do { \ radv_finishme("stub %s", __func__); \ @@ -249,12 +238,10 @@ void radv_loge_v(const char *format, va_list va); return; \ } while (0) +void *radv_resolve_entrypoint(uint32_t index); void *radv_lookup_entrypoint(const char *name); -struct radv_extensions { - VkExtensionProperties *ext_array; - uint32_t num_ext; -}; +extern struct radv_dispatch_table dtable; struct radv_physical_device { VK_LOADER_DATA _loader_data; @@ -263,13 +250,15 @@ struct radv_physical_device { struct radeon_winsys *ws; struct radeon_info rad_info; + uint32_t chipset_id; char path[20]; const char * name; - uint8_t uuid[VK_UUID_SIZE]; + uint64_t aperture_size; + int cmd_parser_version; + uint32_t pci_vendor_id; + uint32_t pci_device_id; - int local_fd; struct wsi_device wsi_device; - struct radv_extensions extensions; }; struct radv_instance { @@ -279,9 +268,7 @@ struct radv_instance { uint32_t apiVersion; int physicalDeviceCount; - struct radv_physical_device physicalDevices[RADV_MAX_DRM_DEVICES]; - - uint64_t debug_flags; + struct radv_physical_device physicalDevice; }; VkResult radv_init_wsi(struct radv_physical_device *physical_device); @@ -337,9 +324,11 @@ struct radv_meta_state { VkRenderPass render_pass[NUM_META_FS_KEYS]; struct radv_pipeline *color_pipelines[NUM_META_FS_KEYS]; - VkRenderPass depthstencil_rp; + VkRenderPass depth_only_rp[NUM_DEPTH_CLEAR_PIPELINES]; struct radv_pipeline *depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES]; + VkRenderPass stencil_only_rp[NUM_DEPTH_CLEAR_PIPELINES]; struct radv_pipeline *stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES]; + VkRenderPass depthstencil_rp[NUM_DEPTH_CLEAR_PIPELINES]; struct radv_pipeline *depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES]; } clear[1 + MAX_SAMPLES_LOG2]; @@ -393,16 +382,6 @@ struct radv_meta_state { VkDescriptorSetLayout img_ds_layout; VkPipeline pipeline; } btoi; - struct { - VkPipelineLayout img_p_layout; - VkDescriptorSetLayout img_ds_layout; - VkPipeline pipeline; - } itoi; - struct { - VkPipelineLayout img_p_layout; - VkDescriptorSetLayout img_ds_layout; - VkPipeline pipeline; - } cleari; struct { VkPipeline pipeline; @@ -438,47 +417,14 @@ struct radv_meta_state { VkPipeline fill_pipeline; VkPipeline copy_pipeline; } buffer; - - struct { - VkDescriptorSetLayout ds_layout; - VkPipelineLayout p_layout; - VkPipeline occlusion_query_pipeline; - VkPipeline pipeline_statistics_query_pipeline; - } query; }; -/* queue types */ -#define RADV_QUEUE_GENERAL 0 -#define RADV_QUEUE_COMPUTE 1 -#define RADV_QUEUE_TRANSFER 2 - -#define RADV_MAX_QUEUE_FAMILIES 3 - -enum ring_type radv_queue_family_to_ring(int f); - struct radv_queue { VK_LOADER_DATA _loader_data; + struct radv_device * device; - struct radeon_winsys_ctx *hw_ctx; - int queue_family_index; - int queue_idx; - - uint32_t scratch_size; - uint32_t compute_scratch_size; - uint32_t esgs_ring_size; - uint32_t gsvs_ring_size; - bool has_tess_rings; - bool has_sample_positions; - - struct radeon_winsys_bo *scratch_bo; - struct radeon_winsys_bo *descriptor_bo; - struct radeon_winsys_bo *compute_scratch_bo; - struct radeon_winsys_bo *esgs_ring_bo; - struct radeon_winsys_bo *gsvs_ring_bo; - struct radeon_winsys_bo *tess_factor_ring_bo; - struct radeon_winsys_bo *tess_offchip_ring_bo; - struct radeon_winsys_cs *initial_preamble_cs; - struct radeon_winsys_cs *continue_preamble_cs; + + struct radv_state_pool * pool; }; struct radv_device { @@ -488,22 +434,14 @@ struct radv_device { struct radv_instance * instance; struct radeon_winsys *ws; + struct radeon_winsys_ctx *hw_ctx; struct radv_meta_state meta_state; + struct radv_queue queue; + struct radeon_winsys_cs *empty_cs; - struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES]; - int queue_count[RADV_MAX_QUEUE_FAMILIES]; - struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES]; - struct radeon_winsys_cs *flush_cs[RADV_MAX_QUEUE_FAMILIES]; - struct radeon_winsys_cs *flush_shader_cs[RADV_MAX_QUEUE_FAMILIES]; - uint64_t debug_flags; - - bool llvm_supports_spill; - bool has_distributed_tess; - uint32_t tess_offchip_block_dw_size; - uint32_t scratch_waves; - - uint32_t gs_table_depth; + bool allow_fast_clears; + bool allow_dcc; /* MSAA sample locations. * The first index is the sample index. @@ -513,25 +451,12 @@ struct radv_device { float sample_locations_4x[4][2]; float sample_locations_8x[8][2]; float sample_locations_16x[16][2]; - - /* CIK and later */ - uint32_t gfx_init_size_dw; - struct radeon_winsys_bo *gfx_init; - - struct radeon_winsys_bo *trace_bo; - uint32_t *trace_id_ptr; - - struct radv_physical_device *physical_device; - - /* Backup in-memory cache to be used if the app doesn't provide one */ - struct radv_pipeline_cache * mem_cache; }; +void radv_device_get_cache_uuid(void *uuid); + struct radv_device_memory { struct radeon_winsys_bo *bo; - /* for dedicated allocations */ - struct radv_image *image; - struct radv_buffer *buffer; uint32_t type_index; VkDeviceSize map_size; void * map; @@ -545,62 +470,35 @@ struct radv_descriptor_range { struct radv_descriptor_set { const struct radv_descriptor_set_layout *layout; + struct list_head descriptor_pool; uint32_t size; + struct radv_buffer_view *buffer_views; struct radeon_winsys_bo *bo; uint64_t va; uint32_t *mapped_ptr; struct radv_descriptor_range *dynamic_descriptors; - - struct list_head vram_list; - struct radeon_winsys_bo *descriptors[0]; }; -struct radv_push_descriptor_set -{ - struct radv_descriptor_set set; - uint32_t capacity; +struct radv_descriptor_pool_free_node { + int next; + uint32_t offset; + uint32_t size; }; struct radv_descriptor_pool { + struct list_head descriptor_sets; + struct radeon_winsys_bo *bo; uint8_t *mapped_ptr; uint64_t current_offset; uint64_t size; - struct list_head vram_list; -}; - -struct radv_descriptor_update_template_entry { - VkDescriptorType descriptor_type; - - /* The number of descriptors to update */ - uint32_t descriptor_count; - - /* Into mapped_ptr or dynamic_descriptors, in units of the respective array */ - uint32_t dst_offset; - - /* In dwords. Not valid/used for dynamic descriptors */ - uint32_t dst_stride; - - uint32_t buffer_offset; - uint32_t buffer_count; - - /* Only valid for combined image samplers and samplers */ - uint16_t has_sampler; - - /* In bytes */ - size_t src_offset; - size_t src_stride; - - /* For push descriptors */ - const uint32_t *immutable_samplers; -}; - -struct radv_descriptor_update_template { - uint32_t entry_count; - struct radv_descriptor_update_template_entry entry[0]; + int free_list; + int full_list; + uint32_t max_sets; + struct radv_descriptor_pool_free_node free_nodes[]; }; struct radv_buffer { @@ -608,7 +506,6 @@ struct radv_buffer { VkDeviceSize size; VkBufferUsageFlags usage; - VkBufferCreateFlags flags; /* Set when bound */ struct radeon_winsys_bo * bo; @@ -641,18 +538,16 @@ enum radv_cmd_flush_bits { RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2, /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */ RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3, - /* Same as above, but only writes back and doesn't invalidate */ - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4, /* Framebuffer caches */ - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5, - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6, - RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7, - RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8, + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 4, + RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 5, + RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 6, + RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 7, /* Engine synchronization. */ - RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9, - RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10, - RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11, - RADV_CMD_FLAG_VGT_FLUSH = 1 << 12, + RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 8, + RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 9, + RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 10, + RADV_CMD_FLAG_VGT_FLUSH = 1 << 11, RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | @@ -725,9 +620,8 @@ struct radv_attachment_state { struct radv_cmd_state { uint32_t vb_dirty; - radv_cmd_dirty_mask_t dirty; bool vertex_descriptors_dirty; - bool push_descriptors_dirty; + radv_cmd_dirty_mask_t dirty; struct radv_pipeline * pipeline; struct radv_pipeline * emitted_pipeline; @@ -744,21 +638,14 @@ struct radv_cmd_state { struct radv_buffer * index_buffer; uint32_t index_type; uint32_t index_offset; - int32_t last_primitive_reset_en; uint32_t last_primitive_reset_index; enum radv_cmd_flush_bits flush_bits; unsigned active_occlusion_queries; float offset_scale; - uint32_t descriptors_dirty; - uint32_t trace_id; - uint32_t last_ia_multi_vgt_param; }; - struct radv_cmd_pool { VkAllocationCallbacks alloc; struct list_head cmd_buffers; - struct list_head free_cmd_buffers; - uint32_t queue_family_index; }; struct radv_cmd_buffer_upload { @@ -781,53 +668,25 @@ struct radv_cmd_buffer { VkCommandBufferLevel level; struct radeon_winsys_cs *cs; struct radv_cmd_state state; - uint32_t queue_family_index; uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE]; - uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS]; + uint32_t dynamic_buffers[16 * MAX_DYNAMIC_BUFFERS]; VkShaderStageFlags push_constant_stages; - struct radv_push_descriptor_set push_descriptors; - struct radv_descriptor_set meta_push_descriptors; struct radv_cmd_buffer_upload upload; bool record_fail; - - uint32_t scratch_size_needed; - uint32_t compute_scratch_size_needed; - uint32_t esgs_ring_size_needed; - uint32_t gsvs_ring_size_needed; - bool tess_rings_needed; - bool sample_positions_needed; - - int ring_offsets_idx; /* just used for verification */ }; struct radv_image; -bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer); - -void si_init_compute(struct radv_cmd_buffer *cmd_buffer); -void si_init_config(struct radv_cmd_buffer *cmd_buffer); - -void cik_create_gfx_config(struct radv_device *device); - +void si_init_config(struct radv_physical_device *physical_device, + struct radv_cmd_buffer *cmd_buffer); void si_write_viewport(struct radeon_winsys_cs *cs, int first_vp, int count, const VkViewport *viewports); void si_write_scissors(struct radeon_winsys_cs *cs, int first, - int count, const VkRect2D *scissors, - const VkViewport *viewports, bool can_use_guardband); -uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, - bool instanced_draw, bool indirect_draw, - uint32_t draw_vertex_count); -void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, - enum chip_class chip_class, - bool is_mec, - enum radv_cmd_flush_bits flush_bits); -void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, - enum chip_class chip_class, - bool is_mec, - enum radv_cmd_flush_bits flush_bits); + int count, const VkRect2D *scissors); +uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer); void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer); void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, @@ -870,10 +729,7 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer, void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t value); -void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer); -bool radv_get_memory_fd(struct radv_device *device, - struct radv_device_memory *memory, - int *pFD); + /* * Takes x,y,z as exact numbers of invocations, instead of blocks. * @@ -907,8 +763,7 @@ radv_hash_shader(unsigned char *hash, struct radv_shader_module *module, const char *entrypoint, const VkSpecializationInfo *spec_info, const struct radv_pipeline_layout *layout, - const union ac_shader_variant_key *key, - uint32_t is_geom_copy_shader); + const union ac_shader_variant_key *key); static inline gl_shader_stage vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) @@ -939,7 +794,6 @@ struct radv_shader_variant { struct ac_shader_variant_info info; unsigned rsrc1; unsigned rsrc2; - uint32_t code_size; }; struct radv_depth_stencil_state { @@ -964,6 +818,7 @@ unsigned radv_format_meta_fs_key(VkFormat format); struct radv_raster_state { uint32_t pa_cl_clip_cntl; + uint32_t pa_cl_vs_out_cntl; uint32_t spi_interp_control; uint32_t pa_su_point_size; uint32_t pa_su_point_minmax; @@ -982,23 +837,6 @@ struct radv_multisample_state { unsigned num_samples; }; -struct radv_prim_vertex_count { - uint8_t min; - uint8_t incr; -}; - -struct radv_tessellation_state { - uint32_t ls_hs_config; - uint32_t tcs_in_layout; - uint32_t tcs_out_layout; - uint32_t tcs_out_offsets; - uint32_t offchip_layout; - unsigned num_patches; - unsigned lds_size; - unsigned num_tcs_input_cp; - uint32_t tf_param; -}; - struct radv_pipeline { struct radv_device * device; uint32_t dynamic_state_mask; @@ -1009,7 +847,6 @@ struct radv_pipeline { bool needs_data_cache; struct radv_shader_variant * shaders[MESA_SHADER_STAGES]; - struct radv_shader_variant *gs_copy_shader; VkShaderStageFlags active_stages; uint32_t va_rsrc_word3[MAX_VERTEX_ATTRIBS]; @@ -1025,38 +862,13 @@ struct radv_pipeline { struct radv_depth_stencil_state ds; struct radv_raster_state raster; struct radv_multisample_state ms; - struct radv_tessellation_state tess; - uint32_t db_shader_control; - uint32_t shader_z_format; unsigned prim; unsigned gs_out; - uint32_t vgt_gs_mode; bool prim_restart_enable; - unsigned esgs_ring_size; - unsigned gsvs_ring_size; - uint32_t ps_input_cntl[32]; - uint32_t ps_input_cntl_num; - uint32_t pa_cl_vs_out_cntl; - uint32_t vgt_shader_stages_en; - struct radv_prim_vertex_count prim_vertex_count; - bool can_use_guardband; } graphics; }; - - unsigned max_waves; - unsigned scratch_bytes_per_wave; }; -static inline bool radv_pipeline_has_gs(struct radv_pipeline *pipeline) -{ - return pipeline->shaders[MESA_SHADER_GEOMETRY] ? true : false; -} - -static inline bool radv_pipeline_has_tess(struct radv_pipeline *pipeline) -{ - return pipeline->shaders[MESA_SHADER_TESS_EVAL] ? true : false; -} - struct radv_graphics_pipeline_create_info { bool use_rectlist; bool db_depth_clear; @@ -1121,6 +933,10 @@ struct radv_cmask_info { uint64_t offset; uint64_t size; unsigned alignment; + unsigned pitch; + unsigned height; + unsigned xalign; + unsigned yalign; unsigned slice_tile_max; unsigned base_address_reg; }; @@ -1147,24 +963,22 @@ struct radv_image { uint32_t samples; /**< VkImageCreateInfo::samples */ VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ VkImageTiling tiling; /** VkImageCreateInfo::tiling */ - VkImageCreateFlags flags; /** VkImageCreateInfo::flags */ VkDeviceSize size; uint32_t alignment; - bool exclusive; - unsigned queue_family_mask; - /* Set when bound */ struct radeon_winsys_bo *bo; VkDeviceSize offset; uint32_t dcc_offset; - uint32_t htile_offset; struct radeon_surf surface; struct radv_fmask_info fmask; struct radv_cmask_info cmask; uint32_t clear_value_offset; + + /* Depth buffer compression and fast clear. */ + struct r600_htile_info htile; }; bool radv_layout_has_htile(const struct radv_image *image, @@ -1173,13 +987,8 @@ bool radv_layout_is_htile_compressed(const struct radv_image *image, VkImageLayout layout); bool radv_layout_can_expclear(const struct radv_image *image, VkImageLayout layout); -bool radv_layout_can_fast_clear(const struct radv_image *image, - VkImageLayout layout, - unsigned queue_mask); - - -unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family); - +bool radv_layout_has_cmask(const struct radv_image *image, + VkImageLayout layout); static inline uint32_t radv_get_layerCount(const struct radv_image *image, const VkImageSubresourceRange *range) @@ -1382,32 +1191,17 @@ struct radv_query_pool { uint32_t availability_offset; char *ptr; VkQueryType type; - uint32_t pipeline_stats_mask; }; -void -radv_update_descriptor_sets(struct radv_device *device, - struct radv_cmd_buffer *cmd_buffer, - VkDescriptorSet overrideSet, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet *pDescriptorWrites, - uint32_t descriptorCopyCount, - const VkCopyDescriptorSet *pDescriptorCopies); +VkResult +radv_temp_descriptor_set_create(struct radv_device *device, + struct radv_cmd_buffer *cmd_buffer, + VkDescriptorSetLayout _layout, + VkDescriptorSet *_set); void -radv_update_descriptor_set_with_template(struct radv_device *device, - struct radv_cmd_buffer *cmd_buffer, - struct radv_descriptor_set *set, - VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, - const void *pData); - -void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout _layout, - uint32_t set, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet *pDescriptorWrites); - +radv_temp_descriptor_set_destroy(struct radv_device *device, + VkDescriptorSet _set); void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, uint32_t value); void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer, @@ -1419,8 +1213,6 @@ struct radv_fence { bool signalled; }; -struct radeon_winsys_sem; - #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType) \ \ static inline struct __radv_type * \ @@ -1464,7 +1256,6 @@ RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, VkBufferView) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, VkDescriptorPool) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, VkDescriptorSet) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, VkDescriptorSetLayout) -RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, VkDescriptorUpdateTemplateKHR) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, VkDeviceMemory) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_fence, VkFence) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_event, VkEvent) @@ -1478,6 +1269,21 @@ RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, VkQueryPool) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, VkRenderPass) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, VkSampler) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_module, VkShaderModule) -RADV_DEFINE_NONDISP_HANDLE_CASTS(radeon_winsys_sem, VkSemaphore) + +#define RADV_DEFINE_STRUCT_CASTS(__radv_type, __VkType) \ + \ + static inline const __VkType * \ + __radv_type ## _to_ ## __VkType(const struct __radv_type *__radv_obj) \ + { \ + return (const __VkType *) __radv_obj; \ + } + +#define RADV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name) \ + const __VkType *__vk_name = radv_common_to_ ## __VkType(__common_name) + +RADV_DEFINE_STRUCT_CASTS(radv_common, VkMemoryBarrier) +RADV_DEFINE_STRUCT_CASTS(radv_common, VkBufferMemoryBarrier) +RADV_DEFINE_STRUCT_CASTS(radv_common, VkImageMemoryBarrier) + #endif /* RADV_PRIVATE_H */ diff --git a/lib/mesa/src/amd/vulkan/radv_query.c b/lib/mesa/src/amd/vulkan/radv_query.c index d581ea534..cce38e853 100644 --- a/lib/mesa/src/amd/vulkan/radv_query.c +++ b/lib/mesa/src/amd/vulkan/radv_query.c @@ -29,20 +29,19 @@ #include <unistd.h> #include <fcntl.h> -#include "nir/nir_builder.h" -#include "radv_meta.h" #include "radv_private.h" #include "radv_cs.h" #include "sid.h" - -static const int pipelinestat_block_size = 11 * 8; -static const unsigned pipeline_statistics_indices[] = {7, 6, 3, 4, 5, 2, 1, 0, 8, 9, 10}; - static unsigned get_max_db(struct radv_device *device) { - unsigned num_db = device->physical_device->rad_info.num_render_backends; - MAYBE_UNUSED unsigned rb_mask = device->physical_device->rad_info.enabled_rb_mask; + unsigned num_db = device->instance->physicalDevice.rad_info.num_render_backends; + unsigned rb_mask = device->instance->physicalDevice.rad_info.enabled_rb_mask; + + if (device->instance->physicalDevice.rad_info.chip_class == SI) + num_db = 8; + else + num_db = MAX2(8, num_db); /* Otherwise we need to change the query reset procedure */ assert(rb_mask == ((1ull << num_db) - 1)); @@ -50,696 +49,6 @@ static unsigned get_max_db(struct radv_device *device) return num_db; } -static void radv_break_on_count(nir_builder *b, nir_variable *var, nir_ssa_def *count) -{ - nir_ssa_def *counter = nir_load_var(b, var); - - nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = nir_src_for_ssa(nir_uge(b, counter, count)); - nir_cf_node_insert(b->cursor, &if_stmt->cf_node); - - b->cursor = nir_after_cf_list(&if_stmt->then_list); - - nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break); - nir_builder_instr_insert(b, &instr->instr); - - b->cursor = nir_after_cf_node(&if_stmt->cf_node); - counter = nir_iadd(b, counter, nir_imm_int(b, 1)); - nir_store_var(b, var, counter, 0x1); -} - -static struct nir_ssa_def * -radv_load_push_int(nir_builder *b, unsigned offset, const char *name) -{ - nir_intrinsic_instr *flags = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); - flags->src[0] = nir_src_for_ssa(nir_imm_int(b, offset)); - flags->num_components = 1; - nir_ssa_dest_init(&flags->instr, &flags->dest, 1, 32, name); - nir_builder_instr_insert(b, &flags->instr); - return &flags->dest.ssa; -} - -static nir_shader * -build_occlusion_query_shader(struct radv_device *device) { - /* the shader this builds is roughly - * - * push constants { - * uint32_t flags; - * uint32_t dst_stride; - * }; - * - * uint32_t src_stride = 16 * db_count; - * - * location(binding = 0) buffer dst_buf; - * location(binding = 1) buffer src_buf; - * - * void main() { - * uint64_t result = 0; - * uint64_t src_offset = src_stride * global_id.x; - * uint64_t dst_offset = dst_stride * global_id.x; - * bool available = true; - * for (int i = 0; i < db_count; ++i) { - * uint64_t start = src_buf[src_offset + 16 * i]; - * uint64_t end = src_buf[src_offset + 16 * i + 8]; - * if ((start & (1ull << 63)) && (end & (1ull << 63))) - * result += end - start; - * else - * available = false; - * } - * uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4; - * if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) { - * if (flags & VK_QUERY_RESULT_64_BIT) - * dst_buf[dst_offset] = result; - * else - * dst_buf[dst_offset] = (uint32_t)result. - * } - * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - * dst_buf[dst_offset + elem_size] = available; - * } - * } - */ - nir_builder b; - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "occlusion_query"); - b.shader->info->cs.local_size[0] = 64; - b.shader->info->cs.local_size[1] = 1; - b.shader->info->cs.local_size[2] = 1; - - nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result"); - nir_variable *outer_counter = nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter"); - nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start"); - nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end"); - nir_variable *available = nir_local_variable_create(b.impl, glsl_int_type(), "available"); - unsigned db_count = get_max_db(device); - - nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags"); - - nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader, - nir_intrinsic_vulkan_resource_index); - dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); - nir_intrinsic_set_desc_set(dst_buf, 0); - nir_intrinsic_set_binding(dst_buf, 0); - nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL); - nir_builder_instr_insert(&b, &dst_buf->instr); - - nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader, - nir_intrinsic_vulkan_resource_index); - src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); - nir_intrinsic_set_desc_set(src_buf, 0); - nir_intrinsic_set_binding(src_buf, 1); - nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL); - nir_builder_instr_insert(&b, &src_buf->instr); - - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); - nir_ssa_def *block_size = nir_imm_ivec4(&b, - b.shader->info->cs.local_size[0], - b.shader->info->cs.local_size[1], - b.shader->info->cs.local_size[2], 0); - nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); - global_id = nir_channel(&b, global_id, 0); // We only care about x here. - - nir_ssa_def *input_stride = nir_imm_int(&b, db_count * 16); - nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id); - nir_ssa_def *output_stride = radv_load_push_int(&b, 4, "output_stride"); - nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id); - - - nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1); - nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1); - nir_store_var(&b, available, nir_imm_int(&b, 1), 0x1); - - nir_loop *outer_loop = nir_loop_create(b.shader); - nir_builder_cf_insert(&b, &outer_loop->cf_node); - b.cursor = nir_after_cf_list(&outer_loop->body); - - nir_ssa_def *current_outer_count = nir_load_var(&b, outer_counter); - radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count)); - - nir_ssa_def *load_offset = nir_imul(&b, current_outer_count, nir_imm_int(&b, 16)); - load_offset = nir_iadd(&b, input_base, load_offset); - - nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo); - load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa); - load->src[1] = nir_src_for_ssa(load_offset); - nir_ssa_dest_init(&load->instr, &load->dest, 2, 64, NULL); - load->num_components = 2; - nir_builder_instr_insert(&b, &load->instr); - - const unsigned swizzle0[] = {0,0,0,0}; - const unsigned swizzle1[] = {1,1,1,1}; - nir_store_var(&b, start, nir_swizzle(&b, &load->dest.ssa, swizzle0, 1, false), 0x1); - nir_store_var(&b, end, nir_swizzle(&b, &load->dest.ssa, swizzle1, 1, false), 0x1); - - nir_ssa_def *start_done = nir_ilt(&b, nir_load_var(&b, start), nir_imm_int64(&b, 0)); - nir_ssa_def *end_done = nir_ilt(&b, nir_load_var(&b, end), nir_imm_int64(&b, 0)); - - nir_if *update_if = nir_if_create(b.shader); - update_if->condition = nir_src_for_ssa(nir_iand(&b, start_done, end_done)); - nir_cf_node_insert(b.cursor, &update_if->cf_node); - - b.cursor = nir_after_cf_list(&update_if->then_list); - - nir_store_var(&b, result, - nir_iadd(&b, nir_load_var(&b, result), - nir_isub(&b, nir_load_var(&b, end), - nir_load_var(&b, start))), 0x1); - - b.cursor = nir_after_cf_list(&update_if->else_list); - - nir_store_var(&b, available, nir_imm_int(&b, 0), 0x1); - - b.cursor = nir_after_cf_node(&outer_loop->cf_node); - - /* Store the result if complete or if partial results have been requested. */ - - nir_ssa_def *result_is_64bit = nir_iand(&b, flags, - nir_imm_int(&b, VK_QUERY_RESULT_64_BIT)); - nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4)); - - nir_if *store_if = nir_if_create(b.shader); - store_if->condition = nir_src_for_ssa(nir_ior(&b, nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT)), nir_load_var(&b, available))); - nir_cf_node_insert(b.cursor, &store_if->cf_node); - - b.cursor = nir_after_cf_list(&store_if->then_list); - - nir_if *store_64bit_if = nir_if_create(b.shader); - store_64bit_if->condition = nir_src_for_ssa(result_is_64bit); - nir_cf_node_insert(b.cursor, &store_64bit_if->cf_node); - - b.cursor = nir_after_cf_list(&store_64bit_if->then_list); - - nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); - store->src[0] = nir_src_for_ssa(nir_load_var(&b, result)); - store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); - store->src[2] = nir_src_for_ssa(output_base); - nir_intrinsic_set_write_mask(store, 0x1); - store->num_components = 1; - nir_builder_instr_insert(&b, &store->instr); - - b.cursor = nir_after_cf_list(&store_64bit_if->else_list); - - store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); - store->src[0] = nir_src_for_ssa(nir_u2u32(&b, nir_load_var(&b, result))); - store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); - store->src[2] = nir_src_for_ssa(output_base); - nir_intrinsic_set_write_mask(store, 0x1); - store->num_components = 1; - nir_builder_instr_insert(&b, &store->instr); - - b.cursor = nir_after_cf_node(&store_if->cf_node); - - /* Store the availability bit if requested. */ - - nir_if *availability_if = nir_if_create(b.shader); - availability_if->condition = nir_src_for_ssa(nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT))); - nir_cf_node_insert(b.cursor, &availability_if->cf_node); - - b.cursor = nir_after_cf_list(&availability_if->then_list); - - store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); - store->src[0] = nir_src_for_ssa(nir_load_var(&b, available)); - store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); - store->src[2] = nir_src_for_ssa(nir_iadd(&b, result_size, output_base)); - nir_intrinsic_set_write_mask(store, 0x1); - store->num_components = 1; - nir_builder_instr_insert(&b, &store->instr); - - return b.shader; -} - -static nir_shader * -build_pipeline_statistics_query_shader(struct radv_device *device) { - /* the shader this builds is roughly - * - * push constants { - * uint32_t flags; - * uint32_t dst_stride; - * uint32_t stats_mask; - * uint32_t avail_offset; - * }; - * - * uint32_t src_stride = pipelinestat_block_size * 2; - * - * location(binding = 0) buffer dst_buf; - * location(binding = 1) buffer src_buf; - * - * void main() { - * uint64_t src_offset = src_stride * global_id.x; - * uint64_t dst_base = dst_stride * global_id.x; - * uint64_t dst_offset = dst_base; - * uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4; - * uint32_t elem_count = stats_mask >> 16; - * uint32_t available = src_buf[avail_offset + 4 * global_id.x]; - * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - * dst_buf[dst_offset + elem_count * elem_size] = available; - * } - * if (available) { - * // repeat 11 times: - * if (stats_mask & (1 << 0)) { - * uint64_t start = src_buf[src_offset + 8 * indices[0]]; - * uint64_t end = src_buf[src_offset + 8 * indices[0] + pipelinestat_block_size]; - * uint64_t result = end - start; - * if (flags & VK_QUERY_RESULT_64_BIT) - * dst_buf[dst_offset] = result; - * else - * dst_buf[dst_offset] = (uint32_t)result. - * dst_offset += elem_size; - * } - * } else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) { - * // Set everything to 0 as we don't know what is valid. - * for (int i = 0; i < elem_count; ++i) - * dst_buf[dst_base + elem_size * i] = 0; - * } - * } - */ - nir_builder b; - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "pipeline_statistics_query"); - b.shader->info->cs.local_size[0] = 64; - b.shader->info->cs.local_size[1] = 1; - b.shader->info->cs.local_size[2] = 1; - - nir_variable *output_offset = nir_local_variable_create(b.impl, glsl_int_type(), "output_offset"); - - nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags"); - nir_ssa_def *stats_mask = radv_load_push_int(&b, 8, "stats_mask"); - nir_ssa_def *avail_offset = radv_load_push_int(&b, 12, "avail_offset"); - - nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader, - nir_intrinsic_vulkan_resource_index); - dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); - nir_intrinsic_set_desc_set(dst_buf, 0); - nir_intrinsic_set_binding(dst_buf, 0); - nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL); - nir_builder_instr_insert(&b, &dst_buf->instr); - - nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader, - nir_intrinsic_vulkan_resource_index); - src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); - nir_intrinsic_set_desc_set(src_buf, 0); - nir_intrinsic_set_binding(src_buf, 1); - nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL); - nir_builder_instr_insert(&b, &src_buf->instr); - - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); - nir_ssa_def *block_size = nir_imm_ivec4(&b, - b.shader->info->cs.local_size[0], - b.shader->info->cs.local_size[1], - b.shader->info->cs.local_size[2], 0); - nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); - global_id = nir_channel(&b, global_id, 0); // We only care about x here. - - nir_ssa_def *input_stride = nir_imm_int(&b, pipelinestat_block_size * 2); - nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id); - nir_ssa_def *output_stride = radv_load_push_int(&b, 4, "output_stride"); - nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id); - - - avail_offset = nir_iadd(&b, avail_offset, - nir_imul(&b, global_id, nir_imm_int(&b, 4))); - - nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo); - load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa); - load->src[1] = nir_src_for_ssa(avail_offset); - nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); - load->num_components = 1; - nir_builder_instr_insert(&b, &load->instr); - nir_ssa_def *available = &load->dest.ssa; - - nir_ssa_def *result_is_64bit = nir_iand(&b, flags, - nir_imm_int(&b, VK_QUERY_RESULT_64_BIT)); - nir_ssa_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4)); - nir_ssa_def *elem_count = nir_ushr(&b, stats_mask, nir_imm_int(&b, 16)); - - /* Store the availability bit if requested. */ - - nir_if *availability_if = nir_if_create(b.shader); - availability_if->condition = nir_src_for_ssa(nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT))); - nir_cf_node_insert(b.cursor, &availability_if->cf_node); - - b.cursor = nir_after_cf_list(&availability_if->then_list); - - nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); - store->src[0] = nir_src_for_ssa(available); - store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); - store->src[2] = nir_src_for_ssa(nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size))); - nir_intrinsic_set_write_mask(store, 0x1); - store->num_components = 1; - nir_builder_instr_insert(&b, &store->instr); - - b.cursor = nir_after_cf_node(&availability_if->cf_node); - - nir_if *available_if = nir_if_create(b.shader); - available_if->condition = nir_src_for_ssa(available); - nir_cf_node_insert(b.cursor, &available_if->cf_node); - - b.cursor = nir_after_cf_list(&available_if->then_list); - - nir_store_var(&b, output_offset, output_base, 0x1); - for (int i = 0; i < 11; ++i) { - nir_if *store_if = nir_if_create(b.shader); - store_if->condition = nir_src_for_ssa(nir_iand(&b, stats_mask, nir_imm_int(&b, 1u << i))); - nir_cf_node_insert(b.cursor, &store_if->cf_node); - - b.cursor = nir_after_cf_list(&store_if->then_list); - - load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo); - load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa); - load->src[1] = nir_src_for_ssa(nir_iadd(&b, input_base, - nir_imm_int(&b, pipeline_statistics_indices[i] * 8))); - nir_ssa_dest_init(&load->instr, &load->dest, 1, 64, NULL); - load->num_components = 1; - nir_builder_instr_insert(&b, &load->instr); - nir_ssa_def *start = &load->dest.ssa; - - load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo); - load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa); - load->src[1] = nir_src_for_ssa(nir_iadd(&b, input_base, - nir_imm_int(&b, pipeline_statistics_indices[i] * 8 + pipelinestat_block_size))); - nir_ssa_dest_init(&load->instr, &load->dest, 1, 64, NULL); - load->num_components = 1; - nir_builder_instr_insert(&b, &load->instr); - nir_ssa_def *end = &load->dest.ssa; - - nir_ssa_def *result = nir_isub(&b, end, start); - - /* Store result */ - nir_if *store_64bit_if = nir_if_create(b.shader); - store_64bit_if->condition = nir_src_for_ssa(result_is_64bit); - nir_cf_node_insert(b.cursor, &store_64bit_if->cf_node); - - b.cursor = nir_after_cf_list(&store_64bit_if->then_list); - - nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); - store->src[0] = nir_src_for_ssa(result); - store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); - store->src[2] = nir_src_for_ssa(nir_load_var(&b, output_offset)); - nir_intrinsic_set_write_mask(store, 0x1); - store->num_components = 1; - nir_builder_instr_insert(&b, &store->instr); - - b.cursor = nir_after_cf_list(&store_64bit_if->else_list); - - store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); - store->src[0] = nir_src_for_ssa(nir_u2u32(&b, result)); - store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); - store->src[2] = nir_src_for_ssa(nir_load_var(&b, output_offset)); - nir_intrinsic_set_write_mask(store, 0x1); - store->num_components = 1; - nir_builder_instr_insert(&b, &store->instr); - - b.cursor = nir_after_cf_node(&store_64bit_if->cf_node); - - nir_store_var(&b, output_offset, - nir_iadd(&b, nir_load_var(&b, output_offset), - elem_size), 0x1); - - b.cursor = nir_after_cf_node(&store_if->cf_node); - } - - b.cursor = nir_after_cf_list(&available_if->else_list); - - available_if = nir_if_create(b.shader); - available_if->condition = nir_src_for_ssa(nir_iand(&b, flags, - nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT))); - nir_cf_node_insert(b.cursor, &available_if->cf_node); - - b.cursor = nir_after_cf_list(&available_if->then_list); - - /* Stores zeros in all outputs. */ - - nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter"); - nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1); - - nir_loop *loop = nir_loop_create(b.shader); - nir_builder_cf_insert(&b, &loop->cf_node); - b.cursor = nir_after_cf_list(&loop->body); - - nir_ssa_def *current_counter = nir_load_var(&b, counter); - radv_break_on_count(&b, counter, elem_count); - - nir_ssa_def *output_elem = nir_iadd(&b, output_base, - nir_imul(&b, elem_size, current_counter)); - - nir_if *store_64bit_if = nir_if_create(b.shader); - store_64bit_if->condition = nir_src_for_ssa(result_is_64bit); - nir_cf_node_insert(b.cursor, &store_64bit_if->cf_node); - - b.cursor = nir_after_cf_list(&store_64bit_if->then_list); - - store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); - store->src[0] = nir_src_for_ssa(nir_imm_int64(&b, 0)); - store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); - store->src[2] = nir_src_for_ssa(output_elem); - nir_intrinsic_set_write_mask(store, 0x1); - store->num_components = 1; - nir_builder_instr_insert(&b, &store->instr); - - b.cursor = nir_after_cf_list(&store_64bit_if->else_list); - - store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); - store->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); - store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); - store->src[2] = nir_src_for_ssa(output_elem); - nir_intrinsic_set_write_mask(store, 0x1); - store->num_components = 1; - nir_builder_instr_insert(&b, &store->instr); - - b.cursor = nir_after_cf_node(&loop->cf_node); - return b.shader; -} - -VkResult radv_device_init_meta_query_state(struct radv_device *device) -{ - VkResult result; - struct radv_shader_module occlusion_cs = { .nir = NULL }; - struct radv_shader_module pipeline_statistics_cs = { .nir = NULL }; - - zero(device->meta_state.query); - - occlusion_cs.nir = build_occlusion_query_shader(device); - pipeline_statistics_cs.nir = build_pipeline_statistics_query_shader(device); - - VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL - }, - } - }; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), - &occlusion_ds_create_info, - &device->meta_state.alloc, - &device->meta_state.query.ds_layout); - if (result != VK_SUCCESS) - goto fail; - - VkPipelineLayoutCreateInfo occlusion_pl_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.query.ds_layout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, - }; - - result = radv_CreatePipelineLayout(radv_device_to_handle(device), - &occlusion_pl_create_info, - &device->meta_state.alloc, - &device->meta_state.query.p_layout); - if (result != VK_SUCCESS) - goto fail; - - VkPipelineShaderStageCreateInfo occlusion_pipeline_shader_stage = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = radv_shader_module_to_handle(&occlusion_cs), - .pName = "main", - .pSpecializationInfo = NULL, - }; - - VkComputePipelineCreateInfo occlusion_vk_pipeline_info = { - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .stage = occlusion_pipeline_shader_stage, - .flags = 0, - .layout = device->meta_state.query.p_layout, - }; - - result = radv_CreateComputePipelines(radv_device_to_handle(device), - radv_pipeline_cache_to_handle(&device->meta_state.cache), - 1, &occlusion_vk_pipeline_info, NULL, - &device->meta_state.query.occlusion_query_pipeline); - if (result != VK_SUCCESS) - goto fail; - - VkPipelineShaderStageCreateInfo pipeline_statistics_pipeline_shader_stage = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = radv_shader_module_to_handle(&pipeline_statistics_cs), - .pName = "main", - .pSpecializationInfo = NULL, - }; - - VkComputePipelineCreateInfo pipeline_statistics_vk_pipeline_info = { - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .stage = pipeline_statistics_pipeline_shader_stage, - .flags = 0, - .layout = device->meta_state.query.p_layout, - }; - - result = radv_CreateComputePipelines(radv_device_to_handle(device), - radv_pipeline_cache_to_handle(&device->meta_state.cache), - 1, &pipeline_statistics_vk_pipeline_info, NULL, - &device->meta_state.query.pipeline_statistics_query_pipeline); - if (result != VK_SUCCESS) - goto fail; - - return VK_SUCCESS; -fail: - radv_device_finish_meta_query_state(device); - ralloc_free(occlusion_cs.nir); - ralloc_free(pipeline_statistics_cs.nir); - return result; -} - -void radv_device_finish_meta_query_state(struct radv_device *device) -{ - if (device->meta_state.query.pipeline_statistics_query_pipeline) - radv_DestroyPipeline(radv_device_to_handle(device), - device->meta_state.query.pipeline_statistics_query_pipeline, - &device->meta_state.alloc); - - if (device->meta_state.query.occlusion_query_pipeline) - radv_DestroyPipeline(radv_device_to_handle(device), - device->meta_state.query.occlusion_query_pipeline, - &device->meta_state.alloc); - - if (device->meta_state.query.p_layout) - radv_DestroyPipelineLayout(radv_device_to_handle(device), - device->meta_state.query.p_layout, - &device->meta_state.alloc); - - if (device->meta_state.query.ds_layout) - radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), - device->meta_state.query.ds_layout, - &device->meta_state.alloc); -} - -static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, - VkPipeline pipeline, - struct radeon_winsys_bo *src_bo, - struct radeon_winsys_bo *dst_bo, - uint64_t src_offset, uint64_t dst_offset, - uint32_t src_stride, uint32_t dst_stride, - uint32_t count, uint32_t flags, - uint32_t pipeline_stats_mask, uint32_t avail_offset) -{ - struct radv_device *device = cmd_buffer->device; - struct radv_meta_saved_compute_state saved_state; - - radv_meta_save_compute(&saved_state, cmd_buffer, 4); - - struct radv_buffer dst_buffer = { - .bo = dst_bo, - .offset = dst_offset, - .size = dst_stride * count - }; - - struct radv_buffer src_buffer = { - .bo = src_bo, - .offset = src_offset, - .size = MAX2(src_stride * count, avail_offset + 4 * count - src_offset) - }; - - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - - radv_meta_push_descriptor_set(cmd_buffer, - VK_PIPELINE_BIND_POINT_COMPUTE, - device->meta_state.query.p_layout, - 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .pBufferInfo = &(VkDescriptorBufferInfo) { - .buffer = radv_buffer_to_handle(&dst_buffer), - .offset = 0, - .range = VK_WHOLE_SIZE - } - }, - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .pBufferInfo = &(VkDescriptorBufferInfo) { - .buffer = radv_buffer_to_handle(&src_buffer), - .offset = 0, - .range = VK_WHOLE_SIZE - } - } - }); - - /* Encode the number of elements for easy access by the shader. */ - pipeline_stats_mask &= 0x7ff; - pipeline_stats_mask |= util_bitcount(pipeline_stats_mask) << 16; - - avail_offset -= src_offset; - - struct { - uint32_t flags; - uint32_t dst_stride; - uint32_t pipeline_stats_mask; - uint32_t avail_offset; - } push_constants = { - flags, - dst_stride, - pipeline_stats_mask, - avail_offset - }; - - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.query.p_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), - &push_constants); - - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 | - RADV_CMD_FLAG_INV_VMEM_L1; - - if (flags & VK_QUERY_RESULT_WAIT_BIT) - cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER; - - radv_unaligned_dispatch(cmd_buffer, count, 1, 1); - - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH; - - radv_meta_restore_compute(&saved_state, cmd_buffer, 4); -} - VkResult radv_CreateQueryPool( VkDevice _device, const VkQueryPoolCreateInfo* pCreateInfo, @@ -758,10 +67,12 @@ VkResult radv_CreateQueryPool( switch(pCreateInfo->queryType) { case VK_QUERY_TYPE_OCCLUSION: - pool->stride = 16 * get_max_db(device); + /* 16 bytes tmp. buffer as the compute packet writes 64 bits, but + * the app. may have 32 bits of space. */ + pool->stride = 16 * get_max_db(device) + 16; break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: - pool->stride = pipelinestat_block_size * 2; + pool->stride = 16 * 11; break; case VK_QUERY_TYPE_TIMESTAMP: pool->stride = 8; @@ -771,12 +82,8 @@ VkResult radv_CreateQueryPool( } pool->type = pCreateInfo->queryType; - pool->pipeline_stats_mask = pCreateInfo->pipelineStatistics; pool->availability_offset = pool->stride * pCreateInfo->queryCount; - size = pool->availability_offset; - if (pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP || - pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS) - size += 4 * pCreateInfo->queryCount; + size = pool->availability_offset + 4 * pCreateInfo->queryCount; pool->bo = device->ws->buffer_create(device->ws, size, 64, RADEON_DOMAIN_GTT, 0); @@ -824,7 +131,6 @@ VkResult radv_GetQueryPoolResults( VkDeviceSize stride, VkQueryResultFlags flags) { - RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); char *data = pData; VkResult result = VK_SUCCESS; @@ -835,21 +141,23 @@ VkResult radv_GetQueryPoolResults( char *src = pool->ptr + query * pool->stride; uint32_t available; - if (pool->type != VK_QUERY_TYPE_OCCLUSION) { - if (flags & VK_QUERY_RESULT_WAIT_BIT) - while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query)) - ; - available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query); + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query)) + ; } - switch (pool->type) { - case VK_QUERY_TYPE_TIMESTAMP: { - if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) { - result = VK_NOT_READY; - break; + if (!*(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query) && + !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) { + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + *(uint32_t*)dest = 0; + result = VK_NOT_READY; + continue; - } + } + available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query); + switch (pool->type) { + case VK_QUERY_TYPE_TIMESTAMP: if (flags & VK_QUERY_RESULT_64_BIT) { *(uint64_t*)dest = *(uint64_t*)src; dest += 8; @@ -858,79 +166,25 @@ VkResult radv_GetQueryPoolResults( dest += 4; } break; - } case VK_QUERY_TYPE_OCCLUSION: { - volatile uint64_t const *src64 = (volatile uint64_t const *)src; - uint64_t sample_count = 0; - int db_count = get_max_db(device); - available = 1; - - for (int i = 0; i < db_count; ++i) { - uint64_t start, end; - do { - start = src64[2 * i]; - end = src64[2 * i + 1]; - } while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && (flags & VK_QUERY_RESULT_WAIT_BIT)); - - if (!(start & (1ull << 63)) || !(end & (1ull << 63))) - available = 0; - else { - sample_count += end - start; - } - } - - if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) { - result = VK_NOT_READY; - break; - - } + uint64_t result = *(uint64_t*)(src + pool->stride - 16); if (flags & VK_QUERY_RESULT_64_BIT) { - *(uint64_t*)dest = sample_count; + *(uint64_t*)dest = result; dest += 8; } else { - *(uint32_t*)dest = sample_count; + *(uint32_t*)dest = result; dest += 4; } break; - } - case VK_QUERY_TYPE_PIPELINE_STATISTICS: { - if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) { - result = VK_NOT_READY; - break; - - } - - const uint64_t *start = (uint64_t*)src; - const uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size); - if (flags & VK_QUERY_RESULT_64_BIT) { - uint64_t *dst = (uint64_t*)dest; - dest += util_bitcount(pool->pipeline_stats_mask) * 8; - for(int i = 0; i < 11; ++i) - if(pool->pipeline_stats_mask & (1u << i)) - *dst++ = stop[pipeline_statistics_indices[i]] - - start[pipeline_statistics_indices[i]]; - - } else { - uint32_t *dst = (uint32_t*)dest; - dest += util_bitcount(pool->pipeline_stats_mask) * 4; - for(int i = 0; i < 11; ++i) - if(pool->pipeline_stats_mask & (1u << i)) - *dst++ = stop[pipeline_statistics_indices[i]] - - start[pipeline_statistics_indices[i]]; - } - break; - } default: unreachable("trying to get results of unhandled query type"); } + } if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - if (flags & VK_QUERY_RESULT_64_BIT) { - *(uint64_t*)dest = available; - } else { - *(uint32_t*)dest = available; - } + *(uint32_t*)dest = available; + dest += 4; } } @@ -951,7 +205,6 @@ void radv_CmdCopyQueryPoolResults( RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); struct radeon_winsys_cs *cs = cmd_buffer->cs; - unsigned elem_size = (flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4; uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo); uint64_t dest_va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo); dest_va += dst_buffer->offset + dstOffset; @@ -959,89 +212,33 @@ void radv_CmdCopyQueryPoolResults( cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, pool->bo, 8); cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8); - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) { - unsigned query = firstQuery + i; - uint64_t src_va = va + query * pool->stride + pool->stride - 4; - - /* Waits on the upper word of the last DB entry */ - radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(cs, 5 | WAIT_REG_MEM_MEM_SPACE(1)); - radeon_emit(cs, src_va); - radeon_emit(cs, src_va >> 32); - radeon_emit(cs, 0x80000000); /* reference value */ - radeon_emit(cs, 0xffffffff); /* mask */ - radeon_emit(cs, 4); /* poll interval */ - } - } - radv_query_shader(cmd_buffer, cmd_buffer->device->meta_state.query.occlusion_query_pipeline, - pool->bo, dst_buffer->bo, firstQuery * pool->stride, - dst_buffer->offset + dstOffset, - get_max_db(cmd_buffer->device) * 16, stride, - queryCount, flags, 0, 0); - break; - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) { - unsigned query = firstQuery + i; - - radeon_check_space(cmd_buffer->device->ws, cs, 7); + for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) { + unsigned query = firstQuery + i; + uint64_t local_src_va = va + query * pool->stride; + unsigned elem_size = (flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4; - uint64_t avail_va = va + pool->availability_offset + 4 * query; + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 26); - /* This waits on the ME. All copies below are done on the ME */ - radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); - radeon_emit(cs, avail_va); - radeon_emit(cs, avail_va >> 32); - radeon_emit(cs, 1); /* reference value */ - radeon_emit(cs, 0xffffffff); /* mask */ - radeon_emit(cs, 4); /* poll interval */ - } + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + /* TODO, not sure if there is any case where we won't always be ready yet */ + uint64_t avail_va = va + pool->availability_offset + 4 * query; + + + /* This waits on the ME. All copies below are done on the ME */ + radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); + radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); + radeon_emit(cs, avail_va); + radeon_emit(cs, avail_va >> 32); + radeon_emit(cs, 1); /* reference value */ + radeon_emit(cs, 0xffffffff); /* mask */ + radeon_emit(cs, 4); /* poll interval */ } - radv_query_shader(cmd_buffer, cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, - pool->bo, dst_buffer->bo, firstQuery * pool->stride, - dst_buffer->offset + dstOffset, - pipelinestat_block_size * 2, stride, queryCount, flags, - pool->pipeline_stats_mask, - pool->availability_offset + 4 * firstQuery); - break; - case VK_QUERY_TYPE_TIMESTAMP: - for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) { - unsigned query = firstQuery + i; - uint64_t local_src_va = va + query * pool->stride; - - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 19); + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + local_src_va += pool->stride - 16; - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - /* TODO, not sure if there is any case where we won't always be ready yet */ - uint64_t avail_va = va + pool->availability_offset + 4 * query; - - /* This waits on the ME. All copies below are done on the ME */ - radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); - radeon_emit(cs, avail_va); - radeon_emit(cs, avail_va >> 32); - radeon_emit(cs, 1); /* reference value */ - radeon_emit(cs, 0xffffffff); /* mask */ - radeon_emit(cs, 4); /* poll interval */ - } - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - uint64_t avail_va = va + pool->availability_offset + 4 * query; - uint64_t avail_dest_va = dest_va + elem_size; - - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_MEM)); - radeon_emit(cs, avail_va); - radeon_emit(cs, avail_va >> 32); - radeon_emit(cs, avail_dest_va); - radeon_emit(cs, avail_dest_va >> 32); - } - + case VK_QUERY_TYPE_TIMESTAMP: radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | COPY_DATA_DST_SEL(COPY_DATA_MEM) | @@ -1050,13 +247,34 @@ void radv_CmdCopyQueryPoolResults( radeon_emit(cs, local_src_va >> 32); radeon_emit(cs, dest_va); radeon_emit(cs, dest_va >> 32); + break; + default: + unreachable("trying to get results of unhandled query type"); + } + /* The flag could be still changed while the data copy is busy and we + * then might have invalid data, but a ready flag. However, the availability + * writes happen on the ME too, so they should be synchronized. Might need to + * revisit this with multiple queues. + */ + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + uint64_t avail_va = va + pool->availability_offset + 4 * query; + uint64_t avail_dest_va = dest_va; + if (pool->type != VK_QUERY_TYPE_PIPELINE_STATISTICS) + avail_dest_va += elem_size; + else + abort(); - assert(cs->cdw <= cdw_max); + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | + COPY_DATA_DST_SEL(COPY_DATA_MEM)); + radeon_emit(cs, avail_va); + radeon_emit(cs, avail_va >> 32); + radeon_emit(cs, avail_dest_va); + radeon_emit(cs, avail_dest_va >> 32); } - break; - default: - unreachable("trying to get results of unhandled query type"); + + assert(cs->cdw <= cdw_max); } } @@ -1075,10 +293,8 @@ void radv_CmdResetQueryPool( si_cp_dma_clear_buffer(cmd_buffer, va + firstQuery * pool->stride, queryCount * pool->stride, 0); - if (pool->type == VK_QUERY_TYPE_TIMESTAMP || - pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) - si_cp_dma_clear_buffer(cmd_buffer, va + pool->availability_offset + firstQuery * 4, - queryCount * 4, 0); + si_cp_dma_clear_buffer(cmd_buffer, va + pool->availability_offset + firstQuery * 4, + queryCount * 4, 0); } void radv_CmdBeginQuery( @@ -1108,14 +324,6 @@ void radv_CmdBeginQuery( radeon_emit(cs, va); radeon_emit(cs, va >> 32); break; - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - radeon_check_space(cmd_buffer->device->ws, cs, 4); - - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - break; default: unreachable("beginning unhandled query type"); } @@ -1149,28 +357,26 @@ void radv_CmdEndQuery( radeon_emit(cs, va + 8); radeon_emit(cs, (va + 8) >> 32); - break; - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - radeon_check_space(cmd_buffer->device->ws, cs, 10); - - va += pipelinestat_block_size; - - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); + radeon_emit(cs, PKT3(PKT3_OCCLUSION_QUERY, 3, 0)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); + radeon_emit(cs, va + pool->stride - 16); + radeon_emit(cs, (va + pool->stride - 16) >> 32); - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | - EVENT_INDEX(5)); - radeon_emit(cs, avail_va); - radeon_emit(cs, (avail_va >> 32) | EOP_DATA_SEL(1)); - radeon_emit(cs, 1); - radeon_emit(cs, 0); break; default: unreachable("ending unhandled query type"); } + + radeon_check_space(cmd_buffer->device->ws, cs, 5); + + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_ME)); + radeon_emit(cs, avail_va); + radeon_emit(cs, avail_va >> 32); + radeon_emit(cs, 1); } void radv_CmdWriteTimestamp( @@ -1181,7 +387,6 @@ void radv_CmdWriteTimestamp( { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); - bool mec = radv_cmd_buffer_uses_mec(cmd_buffer); struct radeon_winsys_cs *cs = cmd_buffer->cs; uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo); uint64_t avail_va = va + pool->availability_offset + 4 * query; @@ -1189,27 +394,17 @@ void radv_CmdWriteTimestamp( cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 5); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12); - - if (mec) { - radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5)); - radeon_emit(cs, 3 << 29); - radeon_emit(cs, query_va); - radeon_emit(cs, query_va >> 32); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - } else { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5)); - radeon_emit(cs, query_va); - radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF)); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - } + unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 11); + + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); + radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5)); + radeon_emit(cs, query_va); + radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF)); + radeon_emit(cs, 0); + radeon_emit(cs, 0); radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(mec ? V_370_MEM_ASYNC : V_370_MEMORY_SYNC) | + radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(cs, avail_va); diff --git a/lib/mesa/src/amd/vulkan/radv_radeon_winsys.h b/lib/mesa/src/amd/vulkan/radv_radeon_winsys.h index f6bab7410..6370f3de7 100644 --- a/lib/mesa/src/amd/vulkan/radv_radeon_winsys.h +++ b/lib/mesa/src/amd/vulkan/radv_radeon_winsys.h @@ -47,7 +47,6 @@ enum radeon_bo_flag { /* bitfield */ RADEON_FLAG_GTT_WC = (1 << 0), RADEON_FLAG_CPU_ACCESS = (1 << 1), RADEON_FLAG_NO_CPU_ACCESS = (1 << 2), - RADEON_FLAG_VIRTUAL = (1 << 3) }; enum radeon_bo_usage { /* bitfield */ @@ -86,16 +85,14 @@ struct radeon_info { uint32_t gart_page_size; uint64_t gart_size; uint64_t vram_size; - uint64_t visible_vram_size; bool has_dedicated_vram; bool has_virtual_memory; bool gfx_ib_pad_with_type2; + bool has_sdma; bool has_uvd; - uint32_t sdma_rings; - uint32_t compute_rings; uint32_t vce_fw_version; uint32_t vce_harvest_config; - uint32_t clock_crystal_freq; /* in kHz */ + uint32_t clock_crystal_freq; /* Kernel info. */ uint32_t drm_major; /* version */ @@ -149,7 +146,6 @@ struct radeon_info { #define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20) #define RADEON_SURF_FMASK (1 << 21) #define RADEON_SURF_DISABLE_DCC (1 << 22) -#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23) #define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK) #define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT) @@ -219,10 +215,6 @@ struct radeon_surf { uint64_t dcc_size; uint64_t dcc_alignment; - - uint64_t htile_size; - uint64_t htile_slice_size; - uint64_t htile_alignment; }; enum radeon_bo_layout { @@ -259,7 +251,6 @@ struct radeon_bo_metadata { struct radeon_winsys_bo; struct radeon_winsys_fence; -struct radeon_winsys_sem; struct radeon_winsys { void (*destroy)(struct radeon_winsys *ws); @@ -290,15 +281,10 @@ struct radeon_winsys { void (*buffer_set_metadata)(struct radeon_winsys_bo *bo, struct radeon_bo_metadata *md); - - void (*buffer_virtual_bind)(struct radeon_winsys_bo *parent, - uint64_t offset, uint64_t size, - struct radeon_winsys_bo *bo, uint64_t bo_offset); struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws); void (*ctx_destroy)(struct radeon_winsys_ctx *ctx); - bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx, - enum ring_type ring_type, int ring_index); + bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx); struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws, enum ring_type ring_type); @@ -312,15 +298,8 @@ struct radeon_winsys { void (*cs_grow)(struct radeon_winsys_cs * cs, size_t min_size); int (*cs_submit)(struct radeon_winsys_ctx *ctx, - int queue_index, struct radeon_winsys_cs **cs_array, unsigned cs_count, - struct radeon_winsys_cs *initial_preamble_cs, - struct radeon_winsys_cs *continue_preamble_cs, - struct radeon_winsys_sem **wait_sem, - unsigned wait_sem_count, - struct radeon_winsys_sem **signal_sem, - unsigned signal_sem_count, bool can_patch, struct radeon_winsys_fence *fence); @@ -331,8 +310,6 @@ struct radeon_winsys { void (*cs_execute_secondary)(struct radeon_winsys_cs *parent, struct radeon_winsys_cs *child); - void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id); - int (*surface_init)(struct radeon_winsys *ws, struct radeon_surf *surf); @@ -345,10 +322,6 @@ struct radeon_winsys { struct radeon_winsys_fence *fence, bool absolute, uint64_t timeout); - - struct radeon_winsys_sem *(*create_sem)(struct radeon_winsys *ws); - void (*destroy_sem)(struct radeon_winsys_sem *sem); - }; static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value) diff --git a/lib/mesa/src/amd/vulkan/radv_util.c b/lib/mesa/src/amd/vulkan/radv_util.c index b892eb788..8c7a948bc 100644 --- a/lib/mesa/src/amd/vulkan/radv_util.c +++ b/lib/mesa/src/amd/vulkan/radv_util.c @@ -29,7 +29,6 @@ #include <assert.h> #include "radv_private.h" -#include "vk_enum_to_str.h" #include "util/u_math.h" @@ -66,13 +65,55 @@ void radv_printflike(3, 4) fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); } +void radv_noreturn radv_printflike(1, 2) + radv_abortf(const char *format, ...) +{ + va_list va; + + va_start(va, format); + radv_abortfv(format, va); + va_end(va); +} + +void radv_noreturn +radv_abortfv(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); + abort(); +} + VkResult __vk_errorf(VkResult error, const char *file, int line, const char *format, ...) { va_list ap; char buffer[256]; - const char *error_str = vk_Result_to_str(error); +#define ERROR_CASE(error) case error: error_str = #error; break; + + const char *error_str; + switch ((int32_t)error) { + + /* Core errors */ + ERROR_CASE(VK_ERROR_OUT_OF_HOST_MEMORY) + ERROR_CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY) + ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED) + ERROR_CASE(VK_ERROR_DEVICE_LOST) + ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED) + ERROR_CASE(VK_ERROR_LAYER_NOT_PRESENT) + ERROR_CASE(VK_ERROR_EXTENSION_NOT_PRESENT) + ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER) + + /* Extension errors */ + ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR) + + default: + assert(!"Unknown error"); + error_str = "unknown error"; + } + +#undef ERROR_CASE if (format) { va_start(ap, format); diff --git a/lib/mesa/src/amd/vulkan/radv_wsi.c b/lib/mesa/src/amd/vulkan/radv_wsi.c index 3a8617fd8..1f1ab1c80 100644 --- a/lib/mesa/src/amd/vulkan/radv_wsi.c +++ b/lib/mesa/src/amd/vulkan/radv_wsi.c @@ -24,9 +24,7 @@ */ #include "radv_private.h" -#include "radv_meta.h" #include "wsi_common.h" -#include "util/vk_util.h" static const struct wsi_callbacks wsi_cbs = { .get_phys_device_format_properties = radv_GetPhysicalDeviceFormatProperties, @@ -77,7 +75,7 @@ void radv_DestroySurfaceKHR( const VkAllocationCallbacks* pAllocator) { RADV_FROM_HANDLE(radv_instance, instance, _instance); - ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface); + RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); vk_free2(&instance->alloc, pAllocator, surface); } @@ -89,12 +87,12 @@ VkResult radv_GetPhysicalDeviceSurfaceSupportKHR( VkBool32* pSupported) { RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); - ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface); + RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); struct wsi_interface *iface = device->wsi_device.wsi[surface->platform]; return iface->get_support(surface, &device->wsi_device, &device->instance->alloc, - queueFamilyIndex, device->local_fd, true, pSupported); + queueFamilyIndex, pSupported); } VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR( @@ -103,7 +101,7 @@ VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR( VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) { RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); - ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface); + RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); struct wsi_interface *iface = device->wsi_device.wsi[surface->platform]; return iface->get_capabilities(surface, pSurfaceCapabilities); @@ -116,7 +114,7 @@ VkResult radv_GetPhysicalDeviceSurfaceFormatsKHR( VkSurfaceFormatKHR* pSurfaceFormats) { RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); - ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface); + RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); struct wsi_interface *iface = device->wsi_device.wsi[surface->platform]; return iface->get_formats(surface, &device->wsi_device, pSurfaceFormatCount, @@ -130,7 +128,7 @@ VkResult radv_GetPhysicalDeviceSurfacePresentModesKHR( VkPresentModeKHR* pPresentModes) { RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); - ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface); + RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); struct wsi_interface *iface = device->wsi_device.wsi[surface->platform]; return iface->get_present_modes(surface, pPresentModeCount, @@ -141,18 +139,18 @@ static VkResult radv_wsi_image_create(VkDevice device_h, const VkSwapchainCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks* pAllocator, - bool needs_linear_copy, - bool linear, VkImage *image_p, VkDeviceMemory *memory_p, uint32_t *size, uint32_t *offset, uint32_t *row_pitch, int *fd_p) { + struct radv_device *device = radv_device_from_handle(device_h); VkResult result = VK_SUCCESS; struct radeon_surf *surface; VkImage image_h; struct radv_image *image; + bool bret; int fd; result = radv_image_create(device_h, @@ -171,7 +169,7 @@ radv_wsi_image_create(VkDevice device_h, .arrayLayers = 1, .samples = 1, /* FIXME: Need a way to use X tiling to allow scanout */ - .tiling = linear ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL, + .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, .flags = 0, }, @@ -184,44 +182,37 @@ radv_wsi_image_create(VkDevice device_h, image = radv_image_from_handle(image_h); VkDeviceMemory memory_h; - - const VkDedicatedAllocationMemoryAllocateInfoNV ded_alloc = { - .sType = VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV, - .pNext = NULL, - .buffer = VK_NULL_HANDLE, - .image = image_h - }; - + struct radv_device_memory *memory; result = radv_AllocateMemory(device_h, &(VkMemoryAllocateInfo) { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = &ded_alloc, - .allocationSize = image->size, - .memoryTypeIndex = linear ? 1 : 0, - }, + .allocationSize = image->size, + .memoryTypeIndex = 0, + }, NULL /* XXX: pAllocator */, &memory_h); if (result != VK_SUCCESS) goto fail_create_image; - radv_BindImageMemory(device_h, image_h, memory_h, 0); - - /* - * return the fd for the image in the no copy mode, - * or the fd for the linear image if a copy is required. - */ - if (!needs_linear_copy || (needs_linear_copy && linear)) { - RADV_FROM_HANDLE(radv_device, device, device_h); - RADV_FROM_HANDLE(radv_device_memory, memory, memory_h); - if (!radv_get_memory_fd(device, memory, &fd)) - goto fail_alloc_memory; - *fd_p = fd; - } + memory = radv_device_memory_from_handle(memory_h); + + radv_BindImageMemory(VK_NULL_HANDLE, image_h, memory_h, 0); + + bret = device->ws->buffer_get_fd(device->ws, + memory->bo, &fd); + if (bret == false) + goto fail_alloc_memory; + { + struct radeon_bo_metadata metadata; + radv_init_metadata(device, image, &metadata); + device->ws->buffer_set_metadata(memory->bo, &metadata); + } surface = &image->surface; *image_p = image_h; *memory_p = memory_h; + *fd_p = fd; *size = image->size; *offset = image->offset; *row_pitch = surface->level[0].pitch_bytes; @@ -251,94 +242,6 @@ static const struct wsi_image_fns radv_wsi_image_fns = { .free_wsi_image = radv_wsi_image_free, }; -#define NUM_PRIME_POOLS RADV_QUEUE_TRANSFER -static void -radv_wsi_free_prime_command_buffers(struct radv_device *device, - struct wsi_swapchain *swapchain) -{ - const int num_pools = NUM_PRIME_POOLS; - const int num_images = swapchain->image_count; - int i; - for (i = 0; i < num_pools; i++) { - radv_FreeCommandBuffers(radv_device_to_handle(device), - swapchain->cmd_pools[i], - swapchain->image_count, - &swapchain->cmd_buffers[i * num_images]); - - radv_DestroyCommandPool(radv_device_to_handle(device), - swapchain->cmd_pools[i], - &swapchain->alloc); - } -} - -static VkResult -radv_wsi_create_prime_command_buffers(struct radv_device *device, - const VkAllocationCallbacks *alloc, - struct wsi_swapchain *swapchain) -{ - const int num_pools = NUM_PRIME_POOLS; - const int num_images = swapchain->image_count; - int num_cmd_buffers = num_images * num_pools; //TODO bump to MAX_QUEUE_FAMILIES - VkResult result; - int i, j; - - swapchain->cmd_buffers = vk_alloc(alloc, (sizeof(VkCommandBuffer) * num_cmd_buffers), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!swapchain->cmd_buffers) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - memset(swapchain->cmd_buffers, 0, sizeof(VkCommandBuffer) * num_cmd_buffers); - memset(swapchain->cmd_pools, 0, sizeof(VkCommandPool) * num_pools); - for (i = 0; i < num_pools; i++) { - VkCommandPoolCreateInfo pool_create_info; - - pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - pool_create_info.pNext = NULL; - pool_create_info.flags = 0; - pool_create_info.queueFamilyIndex = i; - - result = radv_CreateCommandPool(radv_device_to_handle(device), - &pool_create_info, alloc, - &swapchain->cmd_pools[i]); - if (result != VK_SUCCESS) - goto fail; - - VkCommandBufferAllocateInfo cmd_buffer_info; - cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - cmd_buffer_info.pNext = NULL; - cmd_buffer_info.commandPool = swapchain->cmd_pools[i]; - cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - cmd_buffer_info.commandBufferCount = num_images; - - result = radv_AllocateCommandBuffers(radv_device_to_handle(device), - &cmd_buffer_info, - &swapchain->cmd_buffers[i * num_images]); - if (result != VK_SUCCESS) - goto fail; - for (j = 0; j < num_images; j++) { - VkImage image, linear_image; - int idx = (i * num_images) + j; - - swapchain->get_image_and_linear(swapchain, j, &image, &linear_image); - VkCommandBufferBeginInfo begin_info = {0}; - - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - - radv_BeginCommandBuffer(swapchain->cmd_buffers[idx], &begin_info); - - radv_blit_to_prime_linear(radv_cmd_buffer_from_handle(swapchain->cmd_buffers[idx]), - radv_image_from_handle(image), - radv_image_from_handle(linear_image)); - - radv_EndCommandBuffer(swapchain->cmd_buffers[idx]); - } - } - return VK_SUCCESS; -fail: - radv_wsi_free_prime_command_buffers(device, swapchain); - return result; -} - VkResult radv_CreateSwapchainKHR( VkDevice _device, const VkSwapchainCreateInfoKHR* pCreateInfo, @@ -346,9 +249,9 @@ VkResult radv_CreateSwapchainKHR( VkSwapchainKHR* pSwapchain) { RADV_FROM_HANDLE(radv_device, device, _device); - ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, pCreateInfo->surface); + RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface); struct wsi_interface *iface = - device->physical_device->wsi_device.wsi[surface->platform]; + device->instance->physicalDevice.wsi_device.wsi[surface->platform]; struct wsi_swapchain *swapchain; const VkAllocationCallbacks *alloc; if (pAllocator) @@ -356,8 +259,7 @@ VkResult radv_CreateSwapchainKHR( else alloc = &device->alloc; VkResult result = iface->create_swapchain(surface, _device, - &device->physical_device->wsi_device, - device->physical_device->local_fd, + &device->instance->physicalDevice.wsi_device, pCreateInfo, alloc, &radv_wsi_image_fns, &swapchain); @@ -372,13 +274,6 @@ VkResult radv_CreateSwapchainKHR( for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++) swapchain->fences[i] = VK_NULL_HANDLE; - if (swapchain->needs_linear_copy) { - result = radv_wsi_create_prime_command_buffers(device, alloc, - swapchain); - if (result != VK_SUCCESS) - return result; - } - *pSwapchain = wsi_swapchain_to_handle(swapchain); return VK_SUCCESS; @@ -406,9 +301,6 @@ void radv_DestroySwapchainKHR( radv_DestroyFence(_device, swapchain->fences[i], pAllocator); } - if (swapchain->needs_linear_copy) - radv_wsi_free_prime_command_buffers(device, swapchain); - swapchain->destroy(swapchain, alloc); } @@ -453,59 +345,30 @@ VkResult radv_QueuePresentKHR( RADV_FROM_HANDLE(radv_queue, queue, _queue); VkResult result = VK_SUCCESS; - const VkPresentRegionsKHR *regions = - vk_find_struct_const(pPresentInfo->pNext, PRESENT_REGIONS_KHR); - for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { RADV_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]); - struct radeon_winsys_cs *cs; - const VkPresentRegionKHR *region = NULL; - VkResult item_result; assert(radv_device_from_handle(swapchain->device) == queue->device); if (swapchain->fences[0] == VK_NULL_HANDLE) { - item_result = radv_CreateFence(radv_device_to_handle(queue->device), + result = radv_CreateFence(radv_device_to_handle(queue->device), &(VkFenceCreateInfo) { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .flags = 0, }, &swapchain->alloc, &swapchain->fences[0]); - if (pPresentInfo->pResults != NULL) - pPresentInfo->pResults[i] = item_result; - result = result == VK_SUCCESS ? item_result : result; - if (item_result != VK_SUCCESS) - continue; + if (result != VK_SUCCESS) + return result; } else { radv_ResetFences(radv_device_to_handle(queue->device), 1, &swapchain->fences[0]); } - if (swapchain->needs_linear_copy) { - int idx = (queue->queue_family_index * swapchain->image_count) + pPresentInfo->pImageIndices[i]; - cs = radv_cmd_buffer_from_handle(swapchain->cmd_buffers[idx])->cs; - } else - cs = queue->device->empty_cs[queue->queue_family_index]; - RADV_FROM_HANDLE(radv_fence, fence, swapchain->fences[0]); - struct radeon_winsys_fence *base_fence = fence->fence; - struct radeon_winsys_ctx *ctx = queue->hw_ctx; - queue->device->ws->cs_submit(ctx, queue->queue_idx, - &cs, - 1, NULL, NULL, - (struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores, - pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence); - fence->submitted = true; - - if (regions && regions->pRegions) - region = ®ions->pRegions[i]; + radv_QueueSubmit(_queue, 0, NULL, swapchain->fences[0]); - item_result = swapchain->queue_present(swapchain, - pPresentInfo->pImageIndices[i], - region); + result = swapchain->queue_present(swapchain, + pPresentInfo->pImageIndices[i]); /* TODO: What if one of them returns OUT_OF_DATE? */ - if (pPresentInfo->pResults != NULL) - pPresentInfo->pResults[i] = item_result; - result = result == VK_SUCCESS ? item_result : result; - if (item_result != VK_SUCCESS) - continue; + if (result != VK_SUCCESS) + return result; VkFence last = swapchain->fences[2]; swapchain->fences[2] = swapchain->fences[1]; diff --git a/lib/mesa/src/amd/vulkan/radv_wsi_wayland.c b/lib/mesa/src/amd/vulkan/radv_wsi_wayland.c index d9a4c72d6..c6a9667d9 100644 --- a/lib/mesa/src/amd/vulkan/radv_wsi_wayland.c +++ b/lib/mesa/src/amd/vulkan/radv_wsi_wayland.c @@ -23,6 +23,9 @@ * IN THE SOFTWARE. */ +#include <wayland-client.h> +#include <wayland-drm-client-protocol.h> + #include "wsi_common_wayland.h" #include "radv_private.h" diff --git a/lib/mesa/src/amd/vulkan/radv_wsi_x11.c b/lib/mesa/src/amd/vulkan/radv_wsi_x11.c index c65ac9387..946b99095 100644 --- a/lib/mesa/src/amd/vulkan/radv_wsi_x11.c +++ b/lib/mesa/src/amd/vulkan/radv_wsi_x11.c @@ -45,9 +45,7 @@ VkBool32 radv_GetPhysicalDeviceXcbPresentationSupportKHR( return wsi_get_physical_device_xcb_presentation_support( &device->wsi_device, &device->instance->alloc, - queueFamilyIndex, - device->local_fd, true, - connection, visual_id); + queueFamilyIndex, connection, visual_id); } VkBool32 radv_GetPhysicalDeviceXlibPresentationSupportKHR( @@ -61,9 +59,7 @@ VkBool32 radv_GetPhysicalDeviceXlibPresentationSupportKHR( return wsi_get_physical_device_xcb_presentation_support( &device->wsi_device, &device->instance->alloc, - queueFamilyIndex, - device->local_fd, true, - XGetXCBConnection(dpy), visualID); + queueFamilyIndex, XGetXCBConnection(dpy), visualID); } VkResult radv_CreateXcbSurfaceKHR( diff --git a/lib/mesa/src/amd/vulkan/si_cmd_buffer.c b/lib/mesa/src/amd/vulkan/si_cmd_buffer.c index 8d7db9644..a61a950de 100644 --- a/lib/mesa/src/amd/vulkan/si_cmd_buffer.c +++ b/lib/mesa/src/amd/vulkan/si_cmd_buffer.c @@ -171,7 +171,7 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device, } static void -si_emit_compute(struct radv_physical_device *physical_device, +si_init_compute(struct radv_physical_device *physical_device, struct radeon_winsys_cs *cs) { radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3); @@ -209,22 +209,15 @@ si_emit_compute(struct radv_physical_device *physical_device, } } -void -si_init_compute(struct radv_cmd_buffer *cmd_buffer) -{ - struct radv_physical_device *physical_device = cmd_buffer->device->physical_device; - si_emit_compute(physical_device, cmd_buffer->cs); -} -static void -si_emit_config(struct radv_physical_device *physical_device, - struct radeon_winsys_cs *cs) +void si_init_config(struct radv_physical_device *physical_device, + struct radv_cmd_buffer *cmd_buffer) { unsigned num_rb = MIN2(physical_device->rad_info.num_render_backends, 16); unsigned rb_mask = physical_device->rad_info.enabled_rb_mask; unsigned raster_config, raster_config_1; int i; - + struct radeon_winsys_cs *cs = cmd_buffer->cs; radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); radeon_emit(cs, CONTEXT_CONTROL_LOAD_ENABLE(1)); radeon_emit(cs, CONTEXT_CONTROL_SHADOW_ENABLE(1)); @@ -297,7 +290,6 @@ si_emit_config(struct radv_physical_device *physical_device, raster_config_1 = 0x0000002a; break; case CHIP_POLARIS11: - case CHIP_POLARIS12: raster_config = 0x16000012; raster_config_1 = 0x00000000; break; @@ -362,6 +354,11 @@ si_emit_config(struct radv_physical_device *physical_device, radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0); + radeon_set_context_reg(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0)); + radeon_set_context_reg(cs, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0)); + radeon_set_context_reg(cs, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0)); + radeon_set_context_reg(cs, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0)); + radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0); @@ -374,15 +371,6 @@ si_emit_config(struct radv_physical_device *physical_device, radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0); if (physical_device->rad_info.chip_class >= CIK) { - /* If this is 0, Bonaire can hang even if GS isn't being used. - * Other chips are unaffected. These are suboptimal values, - * but we don't use on-chip GS. - */ - radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL, - S_028A44_ES_VERTS_PER_SUBGRP(64) | - S_028A44_GS_PRIMS_PER_SUBGRP(4)); - - radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff)); radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); @@ -395,6 +383,7 @@ si_emit_config(struct radv_physical_device *physical_device, * * LATE_ALLOC_VS = 2 is the highest safe number. */ + radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); } else { @@ -403,6 +392,7 @@ si_emit_config(struct radv_physical_device *physical_device, * - VS can't execute on CU0. * - If HS writes outputs to LDS, LS can't execute on CU0. */ + radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe)); radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe)); radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); } @@ -411,25 +401,16 @@ si_emit_config(struct radv_physical_device *physical_device, } if (physical_device->rad_info.chip_class >= VI) { - uint32_t vgt_tess_distribution; radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL, S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | S_028424_OVERWRITE_COMBINER_WATERMARK(4)); - if (physical_device->rad_info.family < CHIP_POLARIS10) - radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); + radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); - - vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | - S_028B50_ACCUM_TRI(11) | - S_028B50_ACCUM_QUAD(11) | - S_028B50_DONUT_SPLIT(16); - - if (physical_device->rad_info.family == CHIP_FIJI || - physical_device->rad_info.family >= CHIP_POLARIS10) - vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); - radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, - vgt_tess_distribution); + S_028B50_ACCUM_ISOLINE(32) | + S_028B50_ACCUM_TRI(11) | + S_028B50_ACCUM_QUAD(11) | + S_028B50_DONUT_SPLIT(16)); } else { radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); @@ -438,51 +419,7 @@ si_emit_config(struct radv_physical_device *physical_device, if (physical_device->rad_info.family == CHIP_STONEY) radeon_set_context_reg(cs, R_028C40_PA_SC_SHADER_CONTROL, 0); - si_emit_compute(physical_device, cs); -} - -void si_init_config(struct radv_cmd_buffer *cmd_buffer) -{ - struct radv_physical_device *physical_device = cmd_buffer->device->physical_device; - - si_emit_config(physical_device, cmd_buffer->cs); -} - -void -cik_create_gfx_config(struct radv_device *device) -{ - struct radeon_winsys_cs *cs = device->ws->cs_create(device->ws, RING_GFX); - if (!cs) - return; - - si_emit_config(device->physical_device, cs); - - while (cs->cdw & 7) { - if (device->physical_device->rad_info.gfx_ib_pad_with_type2) - radeon_emit(cs, 0x80000000); - else - radeon_emit(cs, 0xffff1000); - } - - device->gfx_init = device->ws->buffer_create(device->ws, - cs->cdw * 4, 4096, - RADEON_DOMAIN_GTT, - RADEON_FLAG_CPU_ACCESS); - if (!device->gfx_init) - goto fail; - - void *map = device->ws->buffer_map(device->gfx_init); - if (!map) { - device->ws->buffer_destroy(device->gfx_init); - device->gfx_init = NULL; - goto fail; - } - memcpy(map, cs->buf, cs->cdw * 4); - - device->ws->buffer_unmap(device->gfx_init); - device->gfx_init_size_dw = cs->cdw; -fail: - device->ws->cs_destroy(cs); + si_init_compute(physical_device, cs); } static void @@ -511,7 +448,21 @@ si_write_viewport(struct radeon_winsys_cs *cs, int first_vp, { int i; - assert(count); + if (count == 0) { + radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6); + radeon_emit(cs, fui(1.0)); + radeon_emit(cs, fui(0.0)); + radeon_emit(cs, fui(1.0)); + radeon_emit(cs, fui(0.0)); + radeon_emit(cs, fui(1.0)); + radeon_emit(cs, fui(0.0)); + + radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2); + radeon_emit(cs, fui(0.0)); + radeon_emit(cs, fui(1.0)); + + return; + } radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE + first_vp * 4 * 6, count * 6); @@ -528,110 +479,39 @@ si_write_viewport(struct radeon_winsys_cs *cs, int first_vp, radeon_emit(cs, fui(translate[2])); } - radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + - first_vp * 4 * 2, count * 2); for (i = 0; i < count; i++) { float zmin = MIN2(viewports[i].minDepth, viewports[i].maxDepth); float zmax = MAX2(viewports[i].minDepth, viewports[i].maxDepth); + radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + + first_vp * 4 * 2, count * 2); radeon_emit(cs, fui(zmin)); radeon_emit(cs, fui(zmax)); } } -static VkRect2D si_scissor_from_viewport(const VkViewport *viewport) -{ - float scale[3], translate[3]; - VkRect2D rect; - - get_viewport_xform(viewport, scale, translate); - - rect.offset.x = translate[0] - abs(scale[0]); - rect.offset.y = translate[1] - abs(scale[1]); - rect.extent.width = ceilf(translate[0] + abs(scale[0])) - rect.offset.x; - rect.extent.height = ceilf(translate[1] + abs(scale[1])) - rect.offset.y; - - return rect; -} - -static VkRect2D si_intersect_scissor(const VkRect2D *a, const VkRect2D *b) { - VkRect2D ret; - ret.offset.x = MAX2(a->offset.x, b->offset.x); - ret.offset.y = MAX2(a->offset.y, b->offset.y); - ret.extent.width = MIN2(a->offset.x + a->extent.width, - b->offset.x + b->extent.width) - ret.offset.x; - ret.extent.height = MIN2(a->offset.y + a->extent.height, - b->offset.y + b->extent.height) - ret.offset.y; - return ret; -} - void si_write_scissors(struct radeon_winsys_cs *cs, int first, - int count, const VkRect2D *scissors, - const VkViewport *viewports, bool can_use_guardband) + int count, const VkRect2D *scissors) { int i; - float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY; - const float max_range = 32767.0f; - assert(count); + if (count == 0) + return; radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2); for (i = 0; i < count; i++) { - VkRect2D viewport_scissor = si_scissor_from_viewport(viewports + i); - VkRect2D scissor = si_intersect_scissor(&scissors[i], &viewport_scissor); - - get_viewport_xform(viewports + i, scale, translate); - scale[0] = abs(scale[0]); - scale[1] = abs(scale[1]); - - if (scale[0] < 0.5) - scale[0] = 0.5; - if (scale[1] < 0.5) - scale[1] = 0.5; - - guardband_x = MIN2(guardband_x, (max_range - abs(translate[0])) / scale[0]); - guardband_y = MIN2(guardband_y, (max_range - abs(translate[1])) / scale[1]); - - radeon_emit(cs, S_028250_TL_X(scissor.offset.x) | - S_028250_TL_Y(scissor.offset.y) | + radeon_emit(cs, S_028250_TL_X(scissors[i].offset.x) | + S_028250_TL_Y(scissors[i].offset.y) | S_028250_WINDOW_OFFSET_DISABLE(1)); - radeon_emit(cs, S_028254_BR_X(scissor.offset.x + scissor.extent.width) | - S_028254_BR_Y(scissor.offset.y + scissor.extent.height)); + radeon_emit(cs, S_028254_BR_X(scissors[i].offset.x + scissors[i].extent.width) | + S_028254_BR_Y(scissors[i].offset.y + scissors[i].extent.height)); } - if (!can_use_guardband) { - guardband_x = 1.0; - guardband_y = 1.0; - } - - radeon_set_context_reg_seq(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4); - radeon_emit(cs, fui(guardband_y)); - radeon_emit(cs, fui(1.0)); - radeon_emit(cs, fui(guardband_x)); - radeon_emit(cs, fui(1.0)); -} - -static inline unsigned -radv_prims_for_vertices(struct radv_prim_vertex_count *info, unsigned num) -{ - if (num == 0) - return 0; - - if (info->incr == 0) - return 0; - - if (num < info->min) - return 0; - - return 1 + ((num - info->min) / info->incr); } uint32_t -si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, - bool instanced_draw, bool indirect_draw, - uint32_t draw_vertex_count) +si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer) { - enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class; - enum radeon_family family = cmd_buffer->device->physical_device->rad_info.family; - struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; + enum chip_class chip_class = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class; + struct radeon_info *info = &cmd_buffer->device->instance->physicalDevice.rad_info; unsigned prim = cmd_buffer->state.pipeline->graphics.prim; unsigned primgroup_size = 128; /* recommended without a GS */ unsigned max_primgroup_in_wave = 2; @@ -641,45 +521,11 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool ia_switch_on_eoi = false; bool partial_vs_wave = false; bool partial_es_wave = false; - uint32_t num_prims = radv_prims_for_vertices(&cmd_buffer->state.pipeline->graphics.prim_vertex_count, draw_vertex_count); - bool multi_instances_smaller_than_primgroup; - - if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) - primgroup_size = cmd_buffer->state.pipeline->graphics.tess.num_patches; - else if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) - primgroup_size = 64; /* recommended with a GS */ - - multi_instances_smaller_than_primgroup = indirect_draw || (instanced_draw && - num_prims < primgroup_size); - if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) { - /* SWITCH_ON_EOI must be set if PrimID is used. */ - if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.uses_prim_id || - cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.uses_prim_id) - ia_switch_on_eoi = true; - /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */ - if ((family == CHIP_TAHITI || - family == CHIP_PITCAIRN || - family == CHIP_BONAIRE) && - radv_pipeline_has_gs(cmd_buffer->state.pipeline)) - partial_vs_wave = true; + /* TODO GS */ + + /* TODO TES */ - /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ - if (cmd_buffer->device->has_distributed_tess) { - if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) { - partial_es_wave = true; - - if (family == CHIP_TONGA || - family == CHIP_FIJI || - family == CHIP_POLARIS10 || - family == CHIP_POLARIS11 || - family == CHIP_POLARIS12) - partial_vs_wave = true; - } else { - partial_vs_wave = true; - } - } - } /* TODO linestipple */ if (chip_class >= CIK) { @@ -690,47 +536,32 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, prim == V_008958_DI_PT_POLYGON || prim == V_008958_DI_PT_LINELOOP || prim == V_008958_DI_PT_TRIFAN || - prim == V_008958_DI_PT_TRISTRIP_ADJ || - (cmd_buffer->state.pipeline->graphics.prim_restart_enable && - (family < CHIP_POLARIS10 || - (prim != V_008958_DI_PT_POINTLIST && - prim != V_008958_DI_PT_LINESTRIP && - prim != V_008958_DI_PT_TRISTRIP)))) - wd_switch_on_eop = true; - - /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0. - * We don't know that for indirect drawing, so treat it as - * always problematic. */ - if (family == CHIP_HAWAII && - (instanced_draw || indirect_draw)) + prim == V_008958_DI_PT_TRISTRIP_ADJ) + // info->primitive_restart || + // info->count_from_stream_output) wd_switch_on_eop = true; - /* Performance recommendation for 4 SE Gfx7-8 parts if - * instances are smaller than a primgroup. - * Assume indirect draws always use small instances. - * This is needed for good VS wave utilization. - */ - if (chip_class <= VI && - info->max_se == 4 && - multi_instances_smaller_than_primgroup) - wd_switch_on_eop = true; + /* TODO HAWAII */ /* Required on CIK and later. */ if (info->max_se > 2 && !wd_switch_on_eop) ia_switch_on_eoi = true; /* Required by Hawaii and, for some special cases, by VI. */ +#if 0 if (ia_switch_on_eoi && - (family == CHIP_HAWAII || - (chip_class == VI && - (radv_pipeline_has_gs(cmd_buffer->state.pipeline) || max_primgroup_in_wave != 2)))) + (sctx->b.family == CHIP_HAWAII || + (sctx->b.chip_class == VI && + (sctx->gs_shader.cso || max_primgroup_in_wave != 2)))) partial_vs_wave = true; +#endif +#if 0 /* Instancing bug on Bonaire. */ - if (family == CHIP_BONAIRE && ia_switch_on_eoi && - (instanced_draw || indirect_draw)) + if (sctx->b.family == CHIP_BONAIRE && ia_switch_on_eoi && + (info->indirect || info->instance_count > 1)) partial_vs_wave = true; - +#endif /* If the WD switch is false, the IA switch must be false too. */ assert(wd_switch_on_eop || !ia_switch_on_eop); } @@ -738,19 +569,21 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, if (ia_switch_on_eoi) partial_es_wave = true; - if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) { - /* GS requirement. */ - if (SI_GS_PER_ES / primgroup_size >= cmd_buffer->device->gs_table_depth - 3) - partial_es_wave = true; - - /* Hw bug with single-primitive instances and SWITCH_ON_EOI - * on multi-SE chips. */ - if (info->max_se >= 2 && ia_switch_on_eoi && - ((instanced_draw || indirect_draw) && - num_prims <= 1)) - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH; - } - + /* GS requirement. */ +#if 0 + if (SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3) + partial_es_wave = true; +#endif + + /* Hw bug with single-primitive instances and SWITCH_ON_EOI + * on multi-SE chips. */ +#if 0 + if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi && + (info->indirect || + (info->instance_count > 1 && + si_num_prims_for_vertices(info) <= 1))) + sctx->b.flags |= SI_CONTEXT_VGT_FLUSH; +#endif return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) | S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | @@ -762,44 +595,27 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, } -static void -si_emit_acquire_mem(struct radeon_winsys_cs *cs, - bool is_mec, - unsigned cp_coher_cntl) -{ - if (is_mec) { - radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) | - PKT3_SHADER_TYPE_S(1)); - radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ - radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ - radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */ - radeon_emit(cs, 0); /* CP_COHER_BASE */ - radeon_emit(cs, 0); /* CP_COHER_BASE_HI */ - radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ - } else { - /* ACQUIRE_MEM is only required on a compute ring. */ - radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0)); - radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ - radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ - radeon_emit(cs, 0); /* CP_COHER_BASE */ - radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ - } -} - void -si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, - enum chip_class chip_class, - bool is_mec, - enum radv_cmd_flush_bits flush_bits) +si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) { + enum chip_class chip_class = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class; unsigned cp_coher_cntl = 0; - if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) + radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128); + + if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_ICACHE) cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); - if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1) + if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_SMEM_L1) cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); + if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) + cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1); + if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) { + cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1); + if (chip_class >= VI) + cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1); + } - if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) { + if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) { cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) | @@ -811,112 +627,74 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, S_0085F0_CB7_DEST_BASE_ENA(1); /* Necessary for DCC */ - if (chip_class >= VI) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) | + if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= VI) { + radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); + radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) | EVENT_INDEX(5)); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - radeon_emit(cs, 0); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, 0); } } - if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) { + if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) { cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1); } - if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); + if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) { + radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); } - if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0)); + if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) { + radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0)); } - if (!(flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | + if (!(cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB))) { - if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); - } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); + if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) { + radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); + } else if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) { + radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); } } - if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); + if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) { + radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); } /* VGT state sync */ - if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); + if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_VGT_FLUSH) { + radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); } /* Make sure ME is idle (it executes most packets) before continuing. * This prevents read-after-write hazards between PFP and ME. */ - if ((cp_coher_cntl || (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) && - !is_mec) { - radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); - radeon_emit(cs, 0); + if (cp_coher_cntl || (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { + radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); + radeon_emit(cmd_buffer->cs, 0); } - if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) || - (chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) { - cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1); - if (chip_class >= VI) - cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1); - } else if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) { - cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1) | - S_0301F0_TC_NC_ACTION_ENA(1); - - /* L2 writeback doesn't combine with L1 invalidate */ - si_emit_acquire_mem(cs, is_mec, cp_coher_cntl); - - cp_coher_cntl = 0; - } - - if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) - cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1); - /* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle. * Therefore, it should be last. Done in PFP. */ - if (cp_coher_cntl) - si_emit_acquire_mem(cs, is_mec, cp_coher_cntl); -} - -void -si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) -{ - bool is_compute = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE; - - if (is_compute) - cmd_buffer->state.flush_bits &= ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | - RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META | - RADV_CMD_FLAG_PS_PARTIAL_FLUSH | - RADV_CMD_FLAG_VS_PARTIAL_FLUSH | - RADV_CMD_FLAG_VGT_FLUSH); - - radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128); - - si_cs_emit_cache_flush(cmd_buffer->cs, - cmd_buffer->device->physical_device->rad_info.chip_class, - radv_cmd_buffer_uses_mec(cmd_buffer), - cmd_buffer->state.flush_bits); - + if (cp_coher_cntl) { + /* ACQUIRE_MEM is only required on a compute ring. */ + radeon_emit(cmd_buffer->cs, PKT3(PKT3_SURFACE_SYNC, 3, 0)); + radeon_emit(cmd_buffer->cs, cp_coher_cntl); /* CP_COHER_CNTL */ + radeon_emit(cmd_buffer->cs, 0xffffffff); /* CP_COHER_SIZE */ + radeon_emit(cmd_buffer->cs, 0); /* CP_COHER_BASE */ + radeon_emit(cmd_buffer->cs, 0x0000000A); /* POLL_INTERVAL */ + } - if (cmd_buffer->state.flush_bits) - radv_cmd_buffer_trace_emit(cmd_buffer); cmd_buffer->state.flush_bits = 0; } @@ -942,7 +720,7 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer, { struct radeon_winsys_cs *cs = cmd_buffer->cs; uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0; - uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM_GFX6(1) : 0; + uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM(1) : 0; uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0; uint32_t sel = flags & CIK_CP_DMA_USE_L2 ? S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | @@ -953,7 +731,7 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer, radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9); - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); radeon_emit(cs, sync_flag | sel); /* CP_SYNC [31] */ radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ @@ -975,12 +753,10 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer, * indices. If we wanted to execute CP DMA in PFP, this packet * should precede it. */ - if (sync_flag && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) { + if (sync_flag) { radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(cs, 0); } - - radv_cmd_buffer_trace_emit(cmd_buffer); } /* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */ @@ -990,7 +766,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, { struct radeon_winsys_cs *cs = cmd_buffer->cs; uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0; - uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM_GFX6(1) : 0; + uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM(1) : 0; uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0; uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0; @@ -999,7 +775,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9); - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); radeon_emit(cs, sync_flag | dst_sel | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */ radeon_emit(cs, clear_value); /* DATA [31:0] */ @@ -1017,11 +793,10 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, } /* See "copy_buffer" for explanation. */ - if (sync_flag && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) { + if (sync_flag) { radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(cs, 0); } - radv_cmd_buffer_trace_emit(cmd_buffer); } static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count, @@ -1072,8 +847,8 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t skipped_size = 0, realign_size = 0; - if (cmd_buffer->device->physical_device->rad_info.family <= CHIP_CARRIZO || - cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) { + if (cmd_buffer->device->instance->physicalDevice.rad_info.family <= CHIP_CARRIZO || + cmd_buffer->device->instance->physicalDevice.rad_info.family == CHIP_STONEY) { /* If the size is not aligned, we must add a dummy copy at the end * just to align the internal counter. Otherwise, the DMA engine * would slow down by an order of magnitude for following copies. diff --git a/lib/mesa/src/amd/vulkan/vk_format.h b/lib/mesa/src/amd/vulkan/vk_format.h index 13ac17934..58ee3f71f 100644 --- a/lib/mesa/src/amd/vulkan/vk_format.h +++ b/lib/mesa/src/amd/vulkan/vk_format.h @@ -24,13 +24,15 @@ * IN THE SOFTWARE. */ -#ifndef VK_FORMAT_H -#define VK_FORMAT_H +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif #include <assert.h> #include <vulkan/vulkan.h> #include <util/macros.h> - enum vk_format_layout { /** * Formats with vk_format_block::width == vk_format_block::height == 1 @@ -444,5 +446,6 @@ vk_format_get_component_bits(VkFormat format, return 0; } } - -#endif /* VK_FORMAT_H */ +#ifdef __cplusplus +} // extern "C" { +#endif diff --git a/lib/mesa/src/amd/vulkan/vk_format_parse.py b/lib/mesa/src/amd/vulkan/vk_format_parse.py index 00cf1adf5..b743fc2bd 100755 --- a/lib/mesa/src/amd/vulkan/vk_format_parse.py +++ b/lib/mesa/src/amd/vulkan/vk_format_parse.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python ''' /************************************************************************** diff --git a/lib/mesa/src/amd/vulkan/vk_format_table.py b/lib/mesa/src/amd/vulkan/vk_format_table.py index 36352b108..06b98e568 100755 --- a/lib/mesa/src/amd/vulkan/vk_format_table.py +++ b/lib/mesa/src/amd/vulkan/vk_format_table.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python CopyRight = ''' /************************************************************************** diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index 7b679450c..7319a9888 100644 --- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -34,210 +34,19 @@ #include <amdgpu_drm.h> #include <inttypes.h> -#include "util/u_atomic.h" - - -static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo); - -static void -radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo, - const struct radv_amdgpu_map_range *range) -{ - assert(range->size); - - if (!range->bo) - return; /* TODO: PRT mapping */ - - p_atomic_inc(&range->bo->ref_count); - int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size, - range->offset + bo->va, 0, AMDGPU_VA_OP_MAP); - if (r) - abort(); -} - -static void -radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo, - const struct radv_amdgpu_map_range *range) -{ - assert(range->size); - - if (!range->bo) - return; /* TODO: PRT mapping */ - - int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size, - range->offset + bo->va, 0, AMDGPU_VA_OP_UNMAP); - if (r) - abort(); - radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo); -} - -static void -radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo) -{ - bo->bo_count = 0; - for (uint32_t i = 0; i < bo->range_count; ++i) { - bool found = false; - if (!bo->ranges[i].bo) - continue; - - for(uint32_t j = 0; j < bo->bo_count; ++j) { - if (bo->bos[j] == bo->ranges[i].bo) { - found = true; - break; - } - } - - if (!found) { - if (bo->bo_capacity == bo->bo_count) { - bo->bos = realloc(bo->bos, - (bo->bo_capacity + 1) * sizeof(struct radv_amdgpu_bo *)); - ++bo->bo_capacity; - } - bo->bos[bo->bo_count++] = bo->ranges[i].bo; - } - } -} - -static void -radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent, - uint64_t offset, uint64_t size, - struct radeon_winsys_bo *_bo, uint64_t bo_offset) -{ - struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent; - struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo; - int range_count_delta, new_idx; - int first = 0, last; - struct radv_amdgpu_map_range new_first, new_last; - - assert(parent->is_virtual); - assert(!bo || !bo->is_virtual); - - if (!size) - return; - - /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */ - if (parent->range_capacity - parent->range_count < 2) { - parent->range_capacity += 2; - parent->ranges = realloc(parent->ranges, - parent->range_capacity * sizeof(struct radv_amdgpu_map_range)); - } - - /* - * [first, last] is exactly the range of ranges that either overlap the - * new parent, or are adjacent to it. This corresponds to the bind ranges - * that may change. - */ - while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset) - ++first; - - last = first; - while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size) - ++last; - - /* Whether the first or last range are going to be totally removed or just - * resized/left alone. Note that in the case of first == last, we will split - * this into a part before and after the new range. The remove flag is then - * whether to not create the corresponding split part. */ - bool remove_first = parent->ranges[first].offset == offset; - bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size; - bool unmapped_first = false; - - assert(parent->ranges[first].offset <= offset); - assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size); - - /* Try to merge the new range with the first range. */ - if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) { - size += offset - parent->ranges[first].offset; - offset = parent->ranges[first].offset; - remove_first = true; - } - - /* Try to merge the new range with the last range. */ - if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) { - size = parent->ranges[last].offset + parent->ranges[last].size - offset; - remove_last = true; - } - - range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last; - new_idx = first + !remove_first; - - /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */ - for (int i = first + 1; i < last; ++i) - radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i); - - /* If the first/last range are not left alone we unmap then and optionally map - * them again after modifications. Not that this implicitly can do the splitting - * if first == last. */ - new_first = parent->ranges[first]; - new_last = parent->ranges[last]; - - if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) { - radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first); - unmapped_first = true; - - if (!remove_first) { - new_first.size = offset - new_first.offset; - radv_amdgpu_winsys_virtual_map(parent, &new_first); - } - } - - if (parent->ranges[last].offset < offset + size || remove_last) { - if (first != last || !unmapped_first) - radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last); - - if (!remove_last) { - new_last.size -= offset + size - new_last.offset; - new_last.offset = offset + size; - radv_amdgpu_winsys_virtual_map(parent, &new_last); - } - } - - /* Moves the range list after last to account for the changed number of ranges. */ - memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1, - sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1)); - - if (!remove_first) - parent->ranges[first] = new_first; - - if (!remove_last) - parent->ranges[new_idx + 1] = new_last; - - /* Actually set up the new range. */ - parent->ranges[new_idx].offset = offset; - parent->ranges[new_idx].size = size; - parent->ranges[new_idx].bo = bo; - parent->ranges[new_idx].bo_offset = bo_offset; - - radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx); - - parent->range_count += range_count_delta; - - radv_amdgpu_winsys_rebuild_bo_list(parent); -} - static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) { struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); - if (p_atomic_dec_return(&bo->ref_count)) - return; - if (bo->is_virtual) { - for (uint32_t i = 0; i < bo->range_count; ++i) { - radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i); - } - free(bo->bos); - free(bo->ranges); - } else { - if (bo->ws->debug_all_bos) { - pthread_mutex_lock(&bo->ws->global_bo_list_lock); - LIST_DEL(&bo->global_list_item); - bo->ws->num_buffers--; - pthread_mutex_unlock(&bo->ws->global_bo_list_lock); - } - amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP); - amdgpu_bo_free(bo->bo); + if (bo->ws->debug_all_bos) { + pthread_mutex_lock(&bo->ws->global_bo_list_lock); + LIST_DEL(&bo->global_list_item); + bo->ws->num_buffers--; + pthread_mutex_unlock(&bo->ws->global_bo_list_lock); } + amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP); amdgpu_va_range_free(bo->va_handle); + amdgpu_bo_free(bo->bo); FREE(bo); } @@ -272,32 +81,6 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, return NULL; } - r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, - size, alignment, 0, &va, &va_handle, 0); - if (r) - goto error_va_alloc; - - bo->va = va; - bo->va_handle = va_handle; - bo->size = size; - bo->ws = ws; - bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL); - bo->ref_count = 1; - - if (flags & RADEON_FLAG_VIRTUAL) { - bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range)); - bo->range_count = 1; - bo->range_capacity = 1; - - bo->ranges[0].offset = 0; - bo->ranges[0].size = size; - bo->ranges[0].bo = NULL; - bo->ranges[0].bo_offset = 0; - - radv_amdgpu_winsys_virtual_map(bo, bo->ranges); - return (struct radeon_winsys_bo *)bo; - } - request.alloc_size = size; request.phys_alignment = alignment; @@ -322,22 +105,31 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, goto error_bo_alloc; } + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, + size, alignment, 0, &va, &va_handle, 0); + if (r) + goto error_va_alloc; + r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP); if (r) goto error_va_map; bo->bo = buf_handle; + bo->va = va; + bo->va_handle = va_handle; bo->initial_domain = initial_domain; + bo->size = size; bo->is_shared = false; + bo->ws = ws; radv_amdgpu_add_buffer_to_global_list(bo); return (struct radeon_winsys_bo *)bo; error_va_map: - amdgpu_bo_free(buf_handle); - -error_bo_alloc: amdgpu_va_range_free(va_handle); error_va_alloc: + amdgpu_bo_free(buf_handle); + +error_bo_alloc: FREE(bo); return NULL; } @@ -413,8 +205,6 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, bo->initial_domain = initial; bo->size = result.alloc_size; bo->is_shared = true; - bo->ws = ws; - radv_amdgpu_add_buffer_to_global_list(bo); return (struct radeon_winsys_bo *)bo; error_va_map: amdgpu_va_range_free(va_handle); @@ -504,5 +294,4 @@ void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws) ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd; ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd; ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata; - ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind; } diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h index 4512e76b3..499b063d5 100644 --- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h +++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h @@ -31,41 +31,17 @@ #include "radv_amdgpu_winsys.h" - -struct radv_amdgpu_map_range { - uint64_t offset; - uint64_t size; - struct radv_amdgpu_winsys_bo *bo; - uint64_t bo_offset; -}; - struct radv_amdgpu_winsys_bo { + amdgpu_bo_handle bo; amdgpu_va_handle va_handle; + uint64_t va; + enum radeon_bo_domain initial_domain; uint64_t size; - struct radv_amdgpu_winsys *ws; - bool is_virtual; - int ref_count; + bool is_shared; - union { - /* physical bo */ - struct { - amdgpu_bo_handle bo; - enum radeon_bo_domain initial_domain; - bool is_shared; - struct list_head global_list_item; - }; - /* virtual bo */ - struct { - struct radv_amdgpu_map_range *ranges; - uint32_t range_count; - uint32_t range_capacity; - - struct radv_amdgpu_winsys_bo **bos; - uint32_t bo_count; - uint32_t bo_capacity; - }; - }; + struct radv_amdgpu_winsys *ws; + struct list_head global_list_item; }; static inline diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index ca7d647fd..b8558fafc 100644 --- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -27,18 +27,12 @@ #include <amdgpu_drm.h> #include <assert.h> -#include "ac_debug.h" #include "amdgpu_id.h" #include "radv_radeon_winsys.h" #include "radv_amdgpu_cs.h" #include "radv_amdgpu_bo.h" #include "sid.h" - -enum { - VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 -}; - struct radv_amdgpu_cs { struct radeon_winsys_cs base; struct radv_amdgpu_winsys *ws; @@ -60,13 +54,6 @@ struct radv_amdgpu_cs { bool is_chained; int buffer_hash_table[1024]; - unsigned hw_ip; - - unsigned num_virtual_buffers; - unsigned max_num_virtual_buffers; - struct radeon_winsys_bo **virtual_buffers; - uint8_t *virtual_buffer_priorities; - int *virtual_buffer_hash_table; }; static inline struct radv_amdgpu_cs * @@ -75,30 +62,6 @@ radv_amdgpu_cs(struct radeon_winsys_cs *base) return (struct radv_amdgpu_cs*)base; } -static int ring_to_hw_ip(enum ring_type ring) -{ - switch (ring) { - case RING_GFX: - return AMDGPU_HW_IP_GFX; - case RING_DMA: - return AMDGPU_HW_IP_DMA; - case RING_COMPUTE: - return AMDGPU_HW_IP_COMPUTE; - default: - unreachable("unsupported ring"); - } -} - -static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx, - struct amdgpu_cs_fence *fence, - struct amdgpu_cs_request *req) -{ - fence->context = ctx->ctx; - fence->ip_type = req->ip_type; - fence->ip_instance = req->ip_instance; - fence->ring = req->ring; - fence->fence = req->seq_no; -} static struct radeon_winsys_fence *radv_amdgpu_create_fence() { @@ -152,9 +115,6 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs) cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]); free(cs->old_ib_buffers); - free(cs->virtual_buffers); - free(cs->virtual_buffer_priorities); - free(cs->virtual_buffer_hash_table); free(cs->handles); free(cs->priorities); free(cs); @@ -166,7 +126,6 @@ static boolean radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs, for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i) cs->buffer_hash_table[i] = -1; - cs->hw_ip = ring_to_hw_ip(ring_type); return true; } @@ -181,7 +140,7 @@ radv_amdgpu_cs_create(struct radeon_winsys *ws, return NULL; cs->ws = radv_amdgpu_winsys(ws); - radv_amdgpu_init_cs(cs, ring_type); + radv_amdgpu_init_cs(cs, RING_GFX); if (cs->ws->use_ib_bos) { cs->ib_buffer = ws->buffer_create(ws, ib_size, 0, @@ -329,13 +288,7 @@ static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs) cs->buffer_hash_table[hash] = -1; } - for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) { - unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1); - cs->virtual_buffer_hash_table[hash] = -1; - } - cs->num_buffers = 0; - cs->num_virtual_buffers = 0; if (cs->ws->use_ib_bos) { cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8); @@ -400,49 +353,6 @@ static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs, ++cs->num_buffers; } -static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_winsys_cs *_cs, - struct radeon_winsys_bo *bo, - uint8_t priority) -{ - struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); - unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1); - - - if (!cs->virtual_buffer_hash_table) { - cs->virtual_buffer_hash_table = malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int)); - for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i) - cs->virtual_buffer_hash_table[i] = -1; - } - - if (cs->virtual_buffer_hash_table[hash] >= 0) { - int idx = cs->virtual_buffer_hash_table[hash]; - if (cs->virtual_buffers[idx] == bo) { - cs->virtual_buffer_priorities[idx] = MAX2(cs->virtual_buffer_priorities[idx], priority); - return; - } - for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) { - if (cs->virtual_buffers[i] == bo) { - cs->virtual_buffer_priorities[i] = MAX2(cs->virtual_buffer_priorities[i], priority); - cs->virtual_buffer_hash_table[hash] = i; - return; - } - } - } - - if(cs->max_num_virtual_buffers <= cs->num_virtual_buffers) { - cs->max_num_virtual_buffers = MAX2(2, cs->max_num_virtual_buffers * 2); - cs->virtual_buffers = realloc(cs->virtual_buffers, sizeof(struct radv_amdgpu_virtual_virtual_buffer*) * cs->max_num_virtual_buffers); - cs->virtual_buffer_priorities = realloc(cs->virtual_buffer_priorities, sizeof(uint8_t) * cs->max_num_virtual_buffers); - } - - cs->virtual_buffers[cs->num_virtual_buffers] = bo; - cs->virtual_buffer_priorities[cs->num_virtual_buffers] = priority; - - cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers; - ++cs->num_virtual_buffers; - -} - static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs, struct radeon_winsys_bo *_bo, uint8_t priority) @@ -450,11 +360,6 @@ static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs, struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); - if (bo->is_virtual) { - radv_amdgpu_cs_add_virtual_buffer(_cs, _bo, priority); - return; - } - radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority); } @@ -469,11 +374,6 @@ static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent, child->priorities[i]); } - for (unsigned i = 0; i < child->num_virtual_buffers; ++i) { - radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i], - child->virtual_buffer_priorities[i]); - } - if (parent->ws->use_ib_bos) { if (parent->base.cdw + 4 > parent->base.max_dw) radv_amdgpu_cs_grow(&parent->base, 4); @@ -495,7 +395,6 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_winsys_cs **cs_array, unsigned count, struct radv_amdgpu_winsys_bo *extra_bo, - struct radeon_winsys_cs *extra_cs, amdgpu_bo_list_handle *bo_list) { int r; @@ -522,8 +421,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, bo_list); free(handles); pthread_mutex_unlock(&ws->global_bo_list_lock); - } else if (count == 1 && !extra_bo && !extra_cs && - !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) { + } else if (count == 1 && !extra_bo) { struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0]; r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles, cs->priorities, bo_list); @@ -533,12 +431,6 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, for (unsigned i = 0; i < count; ++i) { struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i]; total_buffer_count += cs->num_buffers; - for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) - total_buffer_count += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count; - } - - if (extra_cs) { - total_buffer_count += ((struct radv_amdgpu_cs*)extra_cs)->num_buffers; } amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count); @@ -554,27 +446,11 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, priorities[0] = 8; } - for (unsigned i = 0; i < count + !!extra_cs; ++i) { - struct radv_amdgpu_cs *cs; - - if (i == count) - cs = (struct radv_amdgpu_cs*)extra_cs; - else - cs = (struct radv_amdgpu_cs*)cs_array[i]; - - if (!cs->num_buffers) - continue; - - if (unique_bo_count == 0) { - memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle)); - memcpy(priorities, cs->priorities, cs->num_buffers * sizeof(uint8_t)); - unique_bo_count = cs->num_buffers; - continue; - } - int unique_bo_so_far = unique_bo_count; + for (unsigned i = 0; i < count; ++i) { + struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i]; for (unsigned j = 0; j < cs->num_buffers; ++j) { bool found = false; - for (unsigned k = 0; k < unique_bo_so_far; ++k) { + for (unsigned k = 0; k < unique_bo_count; ++k) { if (handles[k] == cs->handles[j]) { found = true; priorities[k] = MAX2(priorities[k], @@ -588,26 +464,6 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, ++unique_bo_count; } } - for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) { - struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]); - for(unsigned k = 0; k < virtual_bo->bo_count; ++k) { - struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k]; - bool found = false; - for (unsigned m = 0; m < unique_bo_count; ++m) { - if (handles[m] == bo->bo) { - found = true; - priorities[m] = MAX2(priorities[m], - cs->virtual_buffer_priorities[j]); - break; - } - } - if (!found) { - handles[unique_bo_count] = bo->bo; - priorities[unique_bo_count] = cs->virtual_buffer_priorities[j]; - ++unique_bo_count; - } - } - } } r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles, priorities, bo_list); @@ -619,20 +475,9 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, return r; } -static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx, - struct amdgpu_cs_request *request) -{ - radv_amdgpu_request_to_fence(ctx, - &ctx->last_submission[request->ip_type][request->ring], - request); -} - static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, - int queue_idx, struct radeon_winsys_cs **cs_array, unsigned cs_count, - struct radeon_winsys_cs *initial_preamble_cs, - struct radeon_winsys_cs *continue_preamble_cs, struct radeon_winsys_fence *_fence) { int r; @@ -641,7 +486,6 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]); amdgpu_bo_list_handle bo_list; struct amdgpu_cs_request request = {0}; - struct amdgpu_cs_ib_info ibs[2]; for (unsigned i = cs_count; i--;) { struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]); @@ -665,25 +509,17 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, } } - r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs, &bo_list); + r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, &bo_list); if (r) { fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n"); return r; } - request.ip_type = cs0->hw_ip; - request.ring = queue_idx; + request.ip_type = AMDGPU_HW_IP_GFX; request.number_of_ibs = 1; request.ibs = &cs0->ib; request.resources = bo_list; - if (initial_preamble_cs) { - request.ibs = ibs; - request.number_of_ibs = 2; - ibs[1] = cs0->ib; - ibs[0] = ((struct radv_amdgpu_cs*)initial_preamble_cs)->ib; - } - r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1); if (r) { if (r == -ENOMEM) @@ -695,20 +531,21 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, amdgpu_bo_list_destroy(bo_list); - if (fence) - radv_amdgpu_request_to_fence(ctx, fence, &request); - - radv_assign_last_submit(ctx, &request); + if (fence) { + fence->context = ctx->ctx; + fence->ip_type = request.ip_type; + fence->ip_instance = request.ip_instance; + fence->ring = request.ring; + fence->fence = request.seq_no; + } + ctx->last_seq_no = request.seq_no; return r; } static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, - int queue_idx, struct radeon_winsys_cs **cs_array, unsigned cs_count, - struct radeon_winsys_cs *initial_preamble_cs, - struct radeon_winsys_cs *continue_preamble_cs, struct radeon_winsys_fence *_fence) { int r; @@ -722,32 +559,24 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, for (unsigned i = 0; i < cs_count;) { struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]); struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT]; - struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs; - unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs, - cs_count - i); + unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT, cs_count - i); memset(&request, 0, sizeof(request)); - r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, - preamble_cs, &bo_list); + r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, &bo_list); if (r) { fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n"); return r; } - request.ip_type = cs0->hw_ip; - request.ring = queue_idx; + request.ip_type = AMDGPU_HW_IP_GFX; request.resources = bo_list; - request.number_of_ibs = cnt + !!preamble_cs; + request.number_of_ibs = cnt; request.ibs = ibs; - if (preamble_cs) { - ibs[0] = radv_amdgpu_cs(preamble_cs)->ib; - } - for (unsigned j = 0; j < cnt; ++j) { struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]); - ibs[j + !!preamble_cs] = cs->ib; + ibs[j] = cs->ib; if (cs->is_chained) { *cs->ib_size_ptr -= 4; @@ -771,20 +600,21 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, i += cnt; } - if (fence) - radv_amdgpu_request_to_fence(ctx, fence, &request); - - radv_assign_last_submit(ctx, &request); + if (fence) { + fence->context = ctx->ctx; + fence->ip_type = request.ip_type; + fence->ip_instance = request.ip_instance; + fence->ring = request.ring; + fence->fence = request.seq_no; + } + ctx->last_seq_no = request.seq_no; return 0; } static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, - int queue_idx, struct radeon_winsys_cs **cs_array, unsigned cs_count, - struct radeon_winsys_cs *initial_preamble_cs, - struct radeon_winsys_cs *continue_preamble_cs, struct radeon_winsys_fence *_fence) { int r; @@ -804,14 +634,10 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, for (unsigned i = 0; i < cs_count;) { struct amdgpu_cs_ib_info ib = {0}; struct radeon_winsys_bo *bo = NULL; - struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs; uint32_t *ptr; unsigned cnt = 0; unsigned size = 0; - if (preamble_cs) - size += preamble_cs->cdw; - while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) { size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw; ++cnt; @@ -822,11 +648,6 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS); ptr = ws->buffer_map(bo); - if (preamble_cs) { - memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4); - ptr += preamble_cs->cdw; - } - for (unsigned j = 0; j < cnt; ++j) { struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]); memcpy(ptr, cs->base.buf, 4 * cs->base.cdw); @@ -843,8 +664,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, - (struct radv_amdgpu_winsys_bo*)bo, - preamble_cs, &bo_list); + (struct radv_amdgpu_winsys_bo*)bo, &bo_list); if (r) { fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n"); return r; @@ -853,8 +673,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, ib.size = size; ib.ib_mc_address = ws->buffer_get_va(bo); - request.ip_type = cs0->hw_ip; - request.ring = queue_idx; + request.ip_type = AMDGPU_HW_IP_GFX; request.resources = bo_list; request.number_of_ibs = 1; request.ibs = &ib; @@ -876,92 +695,35 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, i += cnt; } - if (fence) - radv_amdgpu_request_to_fence(ctx, fence, &request); - - radv_assign_last_submit(ctx, &request); + if (fence) { + fence->context = ctx->ctx; + fence->ip_type = request.ip_type; + fence->ip_instance = request.ip_instance; + fence->ring = request.ring; + fence->fence = request.seq_no; + } + ctx->last_seq_no = request.seq_no; return 0; } static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, - int queue_idx, struct radeon_winsys_cs **cs_array, unsigned cs_count, - struct radeon_winsys_cs *initial_preamble_cs, - struct radeon_winsys_cs *continue_preamble_cs, - struct radeon_winsys_sem **wait_sem, - unsigned wait_sem_count, - struct radeon_winsys_sem **signal_sem, - unsigned signal_sem_count, bool can_patch, struct radeon_winsys_fence *_fence) { struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]); - struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); - int ret; - int i; - - for (i = 0; i < wait_sem_count; i++) { - amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)wait_sem[i]; - amdgpu_cs_wait_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx, - sem); - } if (!cs->ws->use_ib_bos) { - ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array, - cs_count, initial_preamble_cs, continue_preamble_cs, _fence); + return radv_amdgpu_winsys_cs_submit_sysmem(_ctx, cs_array, + cs_count, _fence); } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) { - ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array, - cs_count, initial_preamble_cs, continue_preamble_cs, _fence); + return radv_amdgpu_winsys_cs_submit_chained(_ctx, cs_array, + cs_count, _fence); } else { - ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, cs_array, - cs_count, initial_preamble_cs, continue_preamble_cs, _fence); - } - - for (i = 0; i < signal_sem_count; i++) { - amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)signal_sem[i]; - amdgpu_cs_signal_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx, - sem); - } - return ret; -} - - -static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr) -{ - struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs; - void *ret = NULL; - - if (!cs->ib_buffer) - return NULL; - for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) { - struct radv_amdgpu_winsys_bo *bo; - - bo = (struct radv_amdgpu_winsys_bo*) - (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]); - if (addr >= bo->va && addr - bo->va < bo->size) { - if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0) - return (char *)ret + (addr - bo->va); - } - } - return ret; -} - -static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs, - FILE* file, - uint32_t trace_id) -{ - struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs; - void *ib = cs->base.buf; - int num_dw = cs->base.cdw; - - if (cs->ws->use_ib_bos) { - ib = radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address); - num_dw = cs->ib.size; + return radv_amdgpu_winsys_cs_submit_fallback(_ctx, cs_array, + cs_count, _fence); } - assert(ib); - ac_parse_ib(file, ib, num_dw, trace_id, "main IB", cs->ws->info.chip_class, - radv_amdgpu_winsys_get_cpu_addr, cs); } static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws) @@ -980,7 +742,6 @@ static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_w ctx->ws = ws; return (struct radeon_winsys_ctx *)ctx; error_create: - FREE(ctx); return NULL; } @@ -991,16 +752,22 @@ static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx) FREE(ctx); } -static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx, - enum ring_type ring_type, int ring_index) +static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx) { struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx; - int ip_type = ring_to_hw_ip(ring_type); - if (ctx->last_submission[ip_type][ring_index].fence) { + if (ctx->last_seq_no) { uint32_t expired; - int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index], - 1000000000ull, 0, &expired); + struct amdgpu_cs_fence fence; + + fence.context = ctx->ctx; + fence.ip_type = RING_GFX; + fence.ip_instance = 0; + fence.ring = 0; + fence.fence = ctx->last_seq_no; + + int ret = amdgpu_cs_query_fence_status(&fence, 1000000000ull, 0, + &expired); if (ret || !expired) return false; @@ -1009,23 +776,6 @@ static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx, return true; } -static struct radeon_winsys_sem *radv_amdgpu_create_sem(struct radeon_winsys *_ws) -{ - int ret; - amdgpu_semaphore_handle sem; - - ret = amdgpu_cs_create_semaphore(&sem); - if (ret) - return NULL; - return (struct radeon_winsys_sem *)sem; -} - -static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem *_sem) -{ - amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)_sem; - amdgpu_cs_destroy_semaphore(sem); -} - void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws) { ws->base.ctx_create = radv_amdgpu_ctx_create; @@ -1039,10 +789,7 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws) ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer; ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary; ws->base.cs_submit = radv_amdgpu_winsys_cs_submit; - ws->base.cs_dump = radv_amdgpu_winsys_cs_dump; ws->base.create_fence = radv_amdgpu_create_fence; ws->base.destroy_fence = radv_amdgpu_destroy_fence; - ws->base.create_sem = radv_amdgpu_create_sem; - ws->base.destroy_sem = radv_amdgpu_destroy_sem; ws->base.fence_wait = radv_amdgpu_fence_wait; } diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h index fc6a2c8ef..affee9528 100644 --- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h +++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h @@ -38,14 +38,10 @@ #include "radv_radeon_winsys.h" #include "radv_amdgpu_winsys.h" -enum { - MAX_RINGS_PER_TYPE = 8 -}; - struct radv_amdgpu_ctx { struct radv_amdgpu_winsys *ws; amdgpu_context_handle ctx; - struct amdgpu_cs_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE]; + uint64_t last_seq_no; }; static inline struct radv_amdgpu_ctx * diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c index 511f464df..02aad3c81 100644 --- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c +++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c @@ -140,6 +140,7 @@ ADDR_HANDLE radv_amdgpu_addr_create(struct amdgpu_gpu_info *amdinfo, int family, createFlags.value = 0; createFlags.useTileIndex = 1; + createFlags.degradeBaseLevel = 1; addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND; addrCreateInput.chipFamily = family; @@ -259,30 +260,6 @@ static int radv_compute_level(ADDR_HANDLE addrlib, } } - if (!is_stencil && AddrSurfInfoIn->flags.depth && - surf_level->mode == RADEON_SURF_MODE_2D && level == 0) { - ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0}; - ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0}; - AddrHtileIn.flags.tcCompatible = AddrSurfInfoIn->flags.tcCompatible; - AddrHtileIn.pitch = AddrSurfInfoOut->pitch; - AddrHtileIn.height = AddrSurfInfoOut->height; - AddrHtileIn.numSlices = AddrSurfInfoOut->depth; - AddrHtileIn.blockWidth = ADDR_HTILE_BLOCKSIZE_8; - AddrHtileIn.blockHeight = ADDR_HTILE_BLOCKSIZE_8; - AddrHtileIn.pTileInfo = AddrSurfInfoOut->pTileInfo; - AddrHtileIn.tileIndex = AddrSurfInfoOut->tileIndex; - AddrHtileIn.macroModeIndex = AddrSurfInfoOut->macroModeIndex; - - ret = AddrComputeHtileInfo(addrlib, - &AddrHtileIn, - &AddrHtileOut); - - if (ret == ADDR_OK) { - surf->htile_size = AddrHtileOut.htileBytes; - surf->htile_slice_size = AddrHtileOut.sliceSize; - surf->htile_alignment = AddrHtileOut.baseAlign; - } - } return 0; } @@ -297,19 +274,6 @@ static void radv_set_micro_tile_mode(struct radeon_surf *surf, surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode); } -static unsigned cik_get_macro_tile_index(struct radeon_surf *surf) -{ - unsigned index, tileb; - - tileb = 8 * 8 * surf->bpe; - tileb = MIN2(surf->tile_split, tileb); - - for (index = 0; tileb > 64; index++) - tileb >>= 1; - - assert(index < 16); - return index; -} static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws, struct radeon_surf *surf) @@ -397,7 +361,7 @@ static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws, AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP; AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0; AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0; - AddrSurfInfoIn.flags.opt4Space = 1; + AddrSurfInfoIn.flags.degrade4Space = 1; /* DCC notes: * - If we add MSAA support, keep in mind that CB can't decompress 8bpp @@ -436,7 +400,7 @@ static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws, AddrTileInfoIn.macroAspectRatio = surf->mtilea; AddrTileInfoIn.tileSplitBytes = surf->tile_split; AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to GB_TILE_MODE */ - AddrSurfInfoIn.flags.opt4Space = 0; + AddrSurfInfoIn.flags.degrade4Space = 0; AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn; /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set @@ -471,22 +435,19 @@ static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws, AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */ else AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */ - AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf); } } surf->bo_size = 0; surf->dcc_size = 0; surf->dcc_alignment = 1; - surf->htile_size = surf->htile_slice_size = 0; - surf->htile_alignment = 1; /* Calculate texture layout information. */ for (level = 0; level <= surf->last_level; level++) { r = radv_compute_level(ws->addrlib, surf, false, level, type, compressed, &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut); if (r) - break; + return r; if (level == 0) { surf->bo_alignment = AddrSurfInfoOut.baseAlign; diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c index 629da3153..045610072 100644 --- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c @@ -27,7 +27,6 @@ #include "radv_amdgpu_winsys.h" #include "radv_amdgpu_winsys_public.h" #include "radv_amdgpu_surface.h" -#include "radv_debug.h" #include "amdgpu_id.h" #include "xf86drm.h" #include <stdio.h> @@ -107,7 +106,6 @@ get_chip_name(enum radeon_family family) case CHIP_FIJI: return "AMD RADV FIJI"; case CHIP_POLARIS10: return "AMD RADV POLARIS10"; case CHIP_POLARIS11: return "AMD RADV POLARIS11"; - case CHIP_POLARIS12: return "AMD RADV POLARIS12"; case CHIP_STONEY: return "AMD RADV STONEY"; default: return "AMD RADV unknown"; } @@ -118,16 +116,15 @@ static bool do_winsys_init(struct radv_amdgpu_winsys *ws, int fd) { struct amdgpu_buffer_size_alignments alignment_info = {}; - struct amdgpu_heap_info vram, visible_vram, gtt; + struct amdgpu_heap_info vram, gtt; struct drm_amdgpu_info_hw_ip dma = {}; - struct drm_amdgpu_info_hw_ip compute = {}; drmDevicePtr devinfo; int r; int i, j; /* Get PCI info. */ - r = drmGetDevice2(fd, 0, &devinfo); + r = drmGetDevice(fd, &devinfo); if (r) { - fprintf(stderr, "amdgpu: drmGetDevice2 failed.\n"); + fprintf(stderr, "amdgpu: drmGetDevice failed.\n"); goto fail; } ws->info.pci_domain = devinfo->businfo.pci->domain; @@ -155,13 +152,6 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd) goto fail; } - r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &visible_vram); - if (r) { - fprintf(stderr, "amdgpu: amdgpu_query_heap_info(visible_vram) failed.\n"); - goto fail; - } - r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, >t); if (r) { fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n"); @@ -173,12 +163,6 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd) fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n"); goto fail; } - - r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_COMPUTE, 0, &compute); - if (r) { - fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(compute) failed.\n"); - goto fail; - } ws->info.pci_id = ws->amdinfo.asic_id; /* TODO: is this correct? */ ws->info.vce_harvest_config = ws->amdinfo.vce_harvest_config; @@ -272,10 +256,6 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd) ws->family = FAMILY_VI; ws->rev_id = VI_POLARIS11_M_A0; break; - case CHIP_POLARIS12: - ws->family = FAMILY_VI; - ws->rev_id = VI_POLARIS12_V_A0; - break; default: fprintf(stderr, "amdgpu: Unknown family.\n"); goto fail; @@ -286,15 +266,10 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd) fprintf(stderr, "amdgpu: Cannot create addrlib.\n"); goto fail; } - - assert(util_is_power_of_two(dma.available_rings + 1)); - assert(util_is_power_of_two(compute.available_rings + 1)); - /* Set hardware information. */ ws->info.name = get_chip_name(ws->info.family); ws->info.gart_size = gtt.heap_size; ws->info.vram_size = vram.heap_size; - ws->info.visible_vram_size = visible_vram.heap_size; /* convert the shader clock from KHz to MHz */ ws->info.max_shader_clock = ws->amdinfo.max_engine_clk / 1000; ws->info.max_se = ws->amdinfo.num_shader_engines; @@ -307,10 +282,7 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd) ws->info.num_tile_pipes = radv_cik_get_num_tile_pipes(&ws->amdinfo); ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7); ws->info.has_virtual_memory = TRUE; - ws->info.sdma_rings = MIN2(util_bitcount(dma.available_rings), - MAX_RINGS_PER_TYPE); - ws->info.compute_rings = MIN2(util_bitcount(compute.available_rings), - MAX_RINGS_PER_TYPE); + ws->info.has_sdma = dma.available_rings != 0; /* Get the number of good compute units. */ ws->info.num_good_compute_units = 0; @@ -353,7 +325,7 @@ static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws) } struct radeon_winsys * -radv_amdgpu_winsys_create(int fd, uint32_t debug_flags) +radv_amdgpu_winsys_create(int fd) { uint32_t drm_major, drm_minor, r; amdgpu_device_handle dev; @@ -373,10 +345,7 @@ radv_amdgpu_winsys_create(int fd, uint32_t debug_flags) if (!do_winsys_init(ws, fd)) goto winsys_fail; - ws->debug_all_bos = !!(debug_flags & RADV_DEBUG_ALL_BOS); - if (debug_flags & RADV_DEBUG_NO_IBS) - ws->use_ib_bos = false; - + ws->debug_all_bos = getenv("RADV_DEBUG_ALL_BOS") ? true : false; LIST_INITHEAD(&ws->global_bo_list); pthread_mutex_init(&ws->global_bo_list_lock, NULL); ws->base.query_info = radv_amdgpu_winsys_query_info; diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h index d5d0ff52c..208561db9 100644 --- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h +++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h @@ -29,6 +29,6 @@ #ifndef RADV_AMDGPU_WINSYS_PUBLIC_H #define RADV_AMDGPU_WINSYS_PUBLIC_H -struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint32_t debug_flags); +struct radeon_winsys *radv_amdgpu_winsys_create(int fd); #endif /* RADV_AMDGPU_WINSYS_PUBLIC_H */ |