summaryrefslogtreecommitdiff
path: root/lib/mesa/src/amd/vulkan
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2016-12-11 08:37:01 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2016-12-11 08:37:01 +0000
commite4c140c4d4d00c16d99b15ee6677cbd2e3364031 (patch)
tree96364a9f799341ef8dff3d1534f1de0eb3a559b1 /lib/mesa/src/amd/vulkan
parent10010c14c68222d4056694bf3643ee969d18cd4f (diff)
Import Mesa 13.0.2
Diffstat (limited to 'lib/mesa/src/amd/vulkan')
-rw-r--r--lib/mesa/src/amd/vulkan/Makefile.am70
-rw-r--r--lib/mesa/src/amd/vulkan/Makefile.sources4
-rw-r--r--lib/mesa/src/amd/vulkan/radv_cmd_buffer.c1698
-rw-r--r--lib/mesa/src/amd/vulkan/radv_cs.h4
-rw-r--r--lib/mesa/src/amd/vulkan/radv_descriptor_set.c449
-rw-r--r--lib/mesa/src/amd/vulkan/radv_descriptor_set.h30
-rw-r--r--lib/mesa/src/amd/vulkan/radv_device.c1931
-rw-r--r--lib/mesa/src/amd/vulkan/radv_entrypoints.c344
-rw-r--r--lib/mesa/src/amd/vulkan/radv_entrypoints.h166
-rw-r--r--lib/mesa/src/amd/vulkan/radv_entrypoints_gen.py147
-rw-r--r--lib/mesa/src/amd/vulkan/radv_formats.c105
-rw-r--r--lib/mesa/src/amd/vulkan/radv_image.c192
-rw-r--r--lib/mesa/src/amd/vulkan/radv_meta.c16
-rw-r--r--lib/mesa/src/amd/vulkan/radv_meta.h32
-rw-r--r--lib/mesa/src/amd/vulkan/radv_meta_blit.c284
-rw-r--r--lib/mesa/src/amd/vulkan/radv_meta_blit2d.c170
-rw-r--r--lib/mesa/src/amd/vulkan/radv_meta_buffer.c163
-rw-r--r--lib/mesa/src/amd/vulkan/radv_meta_bufimage.c985
-rw-r--r--lib/mesa/src/amd/vulkan/radv_meta_clear.c608
-rw-r--r--lib/mesa/src/amd/vulkan/radv_meta_copy.c137
-rw-r--r--lib/mesa/src/amd/vulkan/radv_meta_decompress.c51
-rw-r--r--lib/mesa/src/amd/vulkan/radv_meta_fast_clear.c116
-rw-r--r--lib/mesa/src/amd/vulkan/radv_meta_resolve.c107
-rw-r--r--lib/mesa/src/amd/vulkan/radv_meta_resolve_cs.c112
-rw-r--r--lib/mesa/src/amd/vulkan/radv_pipeline.c1118
-rw-r--r--lib/mesa/src/amd/vulkan/radv_pipeline_cache.c45
-rw-r--r--lib/mesa/src/amd/vulkan/radv_private.h390
-rw-r--r--lib/mesa/src/amd/vulkan/radv_query.c1009
-rw-r--r--lib/mesa/src/amd/vulkan/radv_radeon_winsys.h33
-rw-r--r--lib/mesa/src/amd/vulkan/radv_util.c45
-rw-r--r--lib/mesa/src/amd/vulkan/radv_wsi.c213
-rw-r--r--lib/mesa/src/amd/vulkan/radv_wsi_wayland.c3
-rw-r--r--lib/mesa/src/amd/vulkan/radv_wsi_x11.c8
-rw-r--r--lib/mesa/src/amd/vulkan/si_cmd_buffer.c495
-rw-r--r--lib/mesa/src/amd/vulkan/vk_format.h13
-rwxr-xr-xlib/mesa/src/amd/vulkan/vk_format_parse.py1
-rwxr-xr-xlib/mesa/src/amd/vulkan/vk_format_table.py1
-rw-r--r--lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c249
-rw-r--r--lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h36
-rw-r--r--lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c365
-rw-r--r--lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h6
-rw-r--r--lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c47
-rw-r--r--lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c43
-rw-r--r--lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h2
44 files changed, 2853 insertions, 9190 deletions
diff --git a/lib/mesa/src/amd/vulkan/Makefile.am b/lib/mesa/src/amd/vulkan/Makefile.am
index a645432e7..c559a9503 100644
--- a/lib/mesa/src/amd/vulkan/Makefile.am
+++ b/lib/mesa/src/amd/vulkan/Makefile.am
@@ -21,7 +21,9 @@
include Makefile.sources
-noinst_HEADERS = \
+vulkan_includedir = $(includedir)/vulkan
+
+vulkan_include_HEADERS = \
$(top_srcdir)/include/vulkan/vk_platform.h \
$(top_srcdir)/include/vulkan/vulkan.h
@@ -30,12 +32,13 @@ lib_LTLIBRARIES = libvulkan_radeon.la
# The gallium includes are for the util/u_math.h include from main/macros.h
AM_CPPFLAGS = \
+ $(AMDGPU_CFLAGS) \
+ $(VALGRIND_CFLAGS) \
+ $(DEFINES) \
-I$(top_srcdir)/include \
-I$(top_builddir)/src \
-I$(top_srcdir)/src \
-I$(top_srcdir)/src/vulkan/wsi \
- -I$(top_builddir)/src/vulkan/util \
- -I$(top_srcdir)/src/vulkan/util \
-I$(top_srcdir)/src/amd \
-I$(top_srcdir)/src/amd/common \
-I$(top_builddir)/src/compiler \
@@ -45,10 +48,7 @@ AM_CPPFLAGS = \
-I$(top_srcdir)/src/mesa \
-I$(top_srcdir)/src/mesa/drivers/dri/common \
-I$(top_srcdir)/src/gallium/auxiliary \
- -I$(top_srcdir)/src/gallium/include \
- $(AMDGPU_CFLAGS) \
- $(VALGRIND_CFLAGS) \
- $(DEFINES)
+ -I$(top_srcdir)/src/gallium/include
AM_CFLAGS = \
$(VISIBILITY_CFLAGS) \
@@ -59,22 +59,8 @@ VULKAN_SOURCES = \
$(VULKAN_GENERATED_FILES) \
$(VULKAN_FILES)
-VULKAN_LIB_DEPS = \
- libvulkan_common.la \
- $(top_builddir)/src/vulkan/libvulkan_util.la \
- $(top_builddir)/src/vulkan/libvulkan_wsi.la \
- $(top_builddir)/src/amd/common/libamd_common.la \
- $(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \
- $(top_builddir)/src/compiler/nir/libnir.la \
- $(top_builddir)/src/util/libmesautil.la \
- $(LLVM_LIBS) \
- $(LIBELF_LIBS) \
- $(PTHREAD_LIBS) \
- $(AMDGPU_LIBS) \
- $(LIBDRM_LIBS) \
- $(PTHREAD_LIBS) \
- $(DLOPEN_LIBS) \
- -lm
+VULKAN_LIB_DEPS =
+
if HAVE_PLATFORM_X11
AM_CPPFLAGS += \
@@ -84,37 +70,61 @@ AM_CPPFLAGS += \
VULKAN_SOURCES += $(VULKAN_WSI_X11_FILES)
-VULKAN_LIB_DEPS += $(XCB_DRI3_LIBS)
+# FIXME: Use pkg-config for X11-xcb ldflags.
+VULKAN_LIB_DEPS += $(XCB_DRI3_LIBS) -lX11-xcb
endif
if HAVE_PLATFORM_WAYLAND
AM_CPPFLAGS += \
+ -I$(top_builddir)/src/egl/wayland/wayland-drm \
+ -I$(top_srcdir)/src/egl/wayland/wayland-drm \
$(WAYLAND_CFLAGS) \
-DVK_USE_PLATFORM_WAYLAND_KHR
VULKAN_SOURCES += $(VULKAN_WSI_WAYLAND_FILES)
VULKAN_LIB_DEPS += \
+ $(top_builddir)/src/egl/wayland/wayland-drm/libwayland-drm.la \
$(WAYLAND_LIBS)
endif
noinst_LTLIBRARIES = libvulkan_common.la
libvulkan_common_la_SOURCES = $(VULKAN_SOURCES)
+VULKAN_LIB_DEPS += \
+ libvulkan_common.la \
+ $(top_builddir)/src/vulkan/wsi/libvulkan_wsi.la \
+ $(top_builddir)/src/amd/common/libamd_common.la \
+ $(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \
+ $(top_builddir)/src/compiler/nir/libnir.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(LLVM_LIBS) \
+ $(LIBELF_LIBS) \
+ $(PTHREAD_LIBS) \
+ $(AMDGPU_LIBS) \
+ $(LIBDRM_LIBS) \
+ $(PTHREAD_LIBS) \
+ $(DLOPEN_LIBS) \
+ -lm
+
nodist_EXTRA_libvulkan_radeon_la_SOURCES = dummy.cpp
libvulkan_radeon_la_SOURCES = $(VULKAN_GEM_FILES)
-vulkan_api_xml = $(top_srcdir)/src/vulkan/registry/vk.xml
-
-radv_entrypoints.h : radv_entrypoints_gen.py $(vulkan_api_xml)
- $(AM_V_GEN) cat $(vulkan_api_xml) |\
+radv_entrypoints.h : radv_entrypoints_gen.py $(vulkan_include_HEADERS)
+ $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py header > $@
-radv_entrypoints.c : radv_entrypoints_gen.py $(vulkan_api_xml)
- $(AM_V_GEN) cat $(vulkan_api_xml) |\
+radv_entrypoints.c : radv_entrypoints_gen.py $(vulkan_include_HEADERS)
+ $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py code > $@
+.PHONY: radv_timestamp.h
+
+radv_timestamp.h:
+ @echo "Updating radv_timestamp.h"
+ $(AM_V_GEN) echo "#define RADV_TIMESTAMP \"$(TIMESTAMP_CMD)\"" > $@
+
vk_format_table.c: vk_format_table.py \
vk_format_parse.py \
vk_format_layout.csv
diff --git a/lib/mesa/src/amd/vulkan/Makefile.sources b/lib/mesa/src/amd/vulkan/Makefile.sources
index 489695215..d163b9807 100644
--- a/lib/mesa/src/amd/vulkan/Makefile.sources
+++ b/lib/mesa/src/amd/vulkan/Makefile.sources
@@ -33,7 +33,6 @@ RADV_WS_AMDGPU_FILES := \
VULKAN_FILES := \
radv_cmd_buffer.c \
radv_cs.h \
- radv_debug.h \
radv_device.c \
radv_descriptor_set.c \
radv_descriptor_set.h \
@@ -73,5 +72,6 @@ VULKAN_WSI_X11_FILES := \
VULKAN_GENERATED_FILES := \
radv_entrypoints.c \
- radv_entrypoints.h
+ radv_entrypoints.h \
+ radv_timestamp.h
diff --git a/lib/mesa/src/amd/vulkan/radv_cmd_buffer.c b/lib/mesa/src/amd/vulkan/radv_cmd_buffer.c
index fd155411f..9517e7a13 100644
--- a/lib/mesa/src/amd/vulkan/radv_cmd_buffer.c
+++ b/lib/mesa/src/amd/vulkan/radv_cmd_buffer.c
@@ -32,15 +32,11 @@
#include "vk_format.h"
#include "radv_meta.h"
-#include "ac_debug.h"
-
static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkImageLayout src_layout,
VkImageLayout dst_layout,
- uint32_t src_family,
- uint32_t dst_family,
- const VkImageSubresourceRange *range,
+ VkImageSubresourceRange range,
VkImageAspectFlags pending_clears);
const struct radv_dynamic_state default_dynamic_state = {
@@ -114,25 +110,6 @@ radv_dynamic_state_copy(struct radv_dynamic_state *dest,
dest->stencil_reference = src->stencil_reference;
}
-bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer)
-{
- return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
- cmd_buffer->device->physical_device->rad_info.chip_class >= CIK;
-}
-
-enum ring_type radv_queue_family_to_ring(int f) {
- switch (f) {
- case RADV_QUEUE_GENERAL:
- return RING_GFX;
- case RADV_QUEUE_COMPUTE:
- return RING_COMPUTE;
- case RADV_QUEUE_TRANSFER:
- return RING_DMA;
- default:
- unreachable("Unknown queue family");
- }
-}
-
static VkResult radv_create_cmd_buffer(
struct radv_device * device,
struct radv_cmd_pool * pool,
@@ -141,7 +118,7 @@ static VkResult radv_create_cmd_buffer(
{
struct radv_cmd_buffer *cmd_buffer;
VkResult result;
- unsigned ring;
+
cmd_buffer = vk_alloc(&pool->alloc, sizeof(*cmd_buffer), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (cmd_buffer == NULL)
@@ -155,19 +132,14 @@ static VkResult radv_create_cmd_buffer(
if (pool) {
list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
- cmd_buffer->queue_family_index = pool->queue_family_index;
-
} else {
/* Init the pool_link so we can safefly call list_del when we destroy
* the command buffer
*/
list_inithead(&cmd_buffer->pool_link);
- cmd_buffer->queue_family_index = RADV_QUEUE_GENERAL;
}
- ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
-
- cmd_buffer->cs = device->ws->cs_create(device->ws, ring);
+ cmd_buffer->cs = device->ws->cs_create(device->ws, RING_GFX);
if (!cmd_buffer->cs) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
@@ -187,54 +159,6 @@ fail:
return result;
}
-static void
-radv_cmd_buffer_destroy(struct radv_cmd_buffer *cmd_buffer)
-{
- list_del(&cmd_buffer->pool_link);
-
- list_for_each_entry_safe(struct radv_cmd_buffer_upload, up,
- &cmd_buffer->upload.list, list) {
- cmd_buffer->device->ws->buffer_destroy(up->upload_bo);
- list_del(&up->list);
- free(up);
- }
-
- if (cmd_buffer->upload.upload_bo)
- cmd_buffer->device->ws->buffer_destroy(cmd_buffer->upload.upload_bo);
- cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs);
- free(cmd_buffer->push_descriptors.set.mapped_ptr);
- vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
-}
-
-static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
-{
-
- cmd_buffer->device->ws->cs_reset(cmd_buffer->cs);
-
- list_for_each_entry_safe(struct radv_cmd_buffer_upload, up,
- &cmd_buffer->upload.list, list) {
- cmd_buffer->device->ws->buffer_destroy(up->upload_bo);
- list_del(&up->list);
- free(up);
- }
-
- cmd_buffer->scratch_size_needed = 0;
- cmd_buffer->compute_scratch_size_needed = 0;
- cmd_buffer->esgs_ring_size_needed = 0;
- cmd_buffer->gsvs_ring_size_needed = 0;
- cmd_buffer->tess_rings_needed = false;
- cmd_buffer->sample_positions_needed = false;
-
- if (cmd_buffer->upload.upload_bo)
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
- cmd_buffer->upload.upload_bo, 8);
- cmd_buffer->upload.offset = 0;
-
- cmd_buffer->record_fail = false;
-
- cmd_buffer->ring_offsets_idx = -1;
-}
-
static bool
radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer,
uint64_t min_needed)
@@ -322,32 +246,6 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
return true;
}
-void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
-{
- struct radv_device *device = cmd_buffer->device;
- struct radeon_winsys_cs *cs = cmd_buffer->cs;
- uint64_t va;
-
- if (!device->trace_bo)
- return;
-
- va = device->ws->buffer_get_va(device->trace_bo);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7);
-
- ++cmd_buffer->state.trace_id;
- device->ws->cs_add_buffer(cs, device->trace_bo, 8);
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_ME));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, cmd_buffer->state.trace_id);
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
-}
-
static void
radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
@@ -378,58 +276,6 @@ static unsigned radv_pack_float_12p4(float x)
x >= 4096 ? 0xffff : x * 16;
}
-static uint32_t
-shader_stage_to_user_data_0(gl_shader_stage stage, bool has_gs, bool has_tess)
-{
- switch (stage) {
- case MESA_SHADER_FRAGMENT:
- return R_00B030_SPI_SHADER_USER_DATA_PS_0;
- case MESA_SHADER_VERTEX:
- if (has_tess)
- return R_00B530_SPI_SHADER_USER_DATA_LS_0;
- else
- return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B130_SPI_SHADER_USER_DATA_VS_0;
- case MESA_SHADER_GEOMETRY:
- return R_00B230_SPI_SHADER_USER_DATA_GS_0;
- case MESA_SHADER_COMPUTE:
- return R_00B900_COMPUTE_USER_DATA_0;
- case MESA_SHADER_TESS_CTRL:
- return R_00B430_SPI_SHADER_USER_DATA_HS_0;
- case MESA_SHADER_TESS_EVAL:
- if (has_gs)
- return R_00B330_SPI_SHADER_USER_DATA_ES_0;
- else
- return R_00B130_SPI_SHADER_USER_DATA_VS_0;
- default:
- unreachable("unknown shader");
- }
-}
-
-static struct ac_userdata_info *
-radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
- gl_shader_stage stage,
- int idx)
-{
- return &pipeline->shaders[stage]->info.user_sgprs_locs.shader_data[idx];
-}
-
-static void
-radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline,
- gl_shader_stage stage,
- int idx, uint64_t va)
-{
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
- uint32_t base_reg = shader_stage_to_user_data_0(stage, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
- if (loc->sgpr_idx == -1)
- return;
- assert(loc->num_sgprs == 2);
- assert(!loc->indirect);
- radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 2);
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
-}
-
static void
radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
@@ -442,9 +288,6 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_mask[0]);
radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_mask[1]);
- radeon_set_context_reg(cmd_buffer->cs, CM_R_028804_DB_EQAA, ms->db_eqaa);
- radeon_set_context_reg(cmd_buffer->cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1);
-
if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)
return;
@@ -452,37 +295,41 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cmd_buffer->cs, ms->pa_sc_line_cntl);
radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_config);
- radv_cayman_emit_msaa_sample_locs(cmd_buffer->cs, num_samples);
+ radeon_set_context_reg(cmd_buffer->cs, CM_R_028804_DB_EQAA, ms->db_eqaa);
+ radeon_set_context_reg(cmd_buffer->cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1);
- if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_positions) {
- uint32_t offset;
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_FRAGMENT, AC_UD_PS_SAMPLE_POS_OFFSET);
- uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_FRAGMENT, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
- if (loc->sgpr_idx == -1)
- return;
- assert(loc->num_sgprs == 1);
- assert(!loc->indirect);
- switch (num_samples) {
- default:
- offset = 0;
- break;
- case 2:
- offset = 1;
- break;
- case 4:
- offset = 3;
- break;
- case 8:
- offset = 7;
- break;
- case 16:
- offset = 15;
- break;
- }
+ radv_cayman_emit_msaa_sample_locs(cmd_buffer->cs, num_samples);
- radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, offset);
- cmd_buffer->sample_positions_needed = true;
+ uint32_t samples_offset;
+ void *samples_ptr;
+ void *src;
+ radv_cmd_buffer_upload_alloc(cmd_buffer, num_samples * 4 * 2, 256, &samples_offset,
+ &samples_ptr);
+ switch (num_samples) {
+ case 1:
+ src = cmd_buffer->device->sample_locations_1x;
+ break;
+ case 2:
+ src = cmd_buffer->device->sample_locations_2x;
+ break;
+ case 4:
+ src = cmd_buffer->device->sample_locations_4x;
+ break;
+ case 8:
+ src = cmd_buffer->device->sample_locations_8x;
+ break;
+ case 16:
+ src = cmd_buffer->device->sample_locations_16x;
+ break;
}
+ memcpy(samples_ptr, src, num_samples * 4 * 2);
+
+ uint64_t va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
+ va += samples_offset;
+
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 + AC_USERDATA_PS_SAMPLE_POS * 4, 2);
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
}
static void
@@ -498,8 +345,7 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer,
raster->spi_interp_control);
radeon_set_context_reg_seq(cmd_buffer->cs, R_028A00_PA_SU_POINT_SIZE, 2);
- unsigned tmp = (unsigned)(1.0 * 8.0);
- radeon_emit(cmd_buffer->cs, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
+ radeon_emit(cmd_buffer->cs, 0);
radeon_emit(cmd_buffer->cs, S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) |
S_028A04_MAX_SIZE(radv_pack_float_12p4(8192/2))); /* R_028A04_PA_SU_POINT_MINMAX */
@@ -511,39 +357,47 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer,
}
static void
-radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline,
- struct radv_shader_variant *shader,
- struct ac_vs_output_info *outinfo)
+radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline)
{
struct radeon_winsys *ws = cmd_buffer->device->ws;
- uint64_t va = ws->buffer_get_va(shader->bo);
+ struct radv_shader_variant *vs;
+ uint64_t va;
unsigned export_count;
+ unsigned clip_dist_mask, cull_dist_mask, total_mask;
+
+ assert (pipeline->shaders[MESA_SHADER_VERTEX]);
+
+ vs = pipeline->shaders[MESA_SHADER_VERTEX];
+ va = ws->buffer_get_va(vs->bo);
+ ws->cs_add_buffer(cmd_buffer->cs, vs->bo, 8);
- ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
+ clip_dist_mask = vs->info.vs.clip_dist_mask;
+ cull_dist_mask = vs->info.vs.cull_dist_mask;
+ total_mask = clip_dist_mask | cull_dist_mask;
+ radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, 0);
+ radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, 0);
- export_count = MAX2(1, outinfo->param_exports);
+ export_count = MAX2(1, vs->info.vs.param_exports);
radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG,
S_0286C4_VS_EXPORT_COUNT(export_count - 1));
-
radeon_set_context_reg(cmd_buffer->cs, R_02870C_SPI_SHADER_POS_FORMAT,
S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
- S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ?
+ S_02870C_POS1_EXPORT_FORMAT(vs->info.vs.pos_exports > 1 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ?
+ S_02870C_POS2_EXPORT_FORMAT(vs->info.vs.pos_exports > 2 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ?
+ S_02870C_POS3_EXPORT_FORMAT(vs->info.vs.pos_exports > 3 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE));
-
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
radeon_emit(cmd_buffer->cs, va >> 40);
- radeon_emit(cmd_buffer->cs, shader->rsrc1);
- radeon_emit(cmd_buffer->cs, shader->rsrc2);
+ radeon_emit(cmd_buffer->cs, vs->rsrc1);
+ radeon_emit(cmd_buffer->cs, vs->rsrc2);
radeon_set_context_reg(cmd_buffer->cs, R_028818_PA_CL_VTE_CNTL,
S_028818_VTX_W0_FMT(1) |
@@ -551,236 +405,34 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
-
radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL,
- pipeline->graphics.pa_cl_vs_out_cntl);
-
- radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF,
- S_028AB4_REUSE_OFF(outinfo->writes_viewport_index));
-}
-
-static void
-radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer,
- struct radv_shader_variant *shader,
- struct ac_es_output_info *outinfo)
-{
- struct radeon_winsys *ws = cmd_buffer->device->ws;
- uint64_t va = ws->buffer_get_va(shader->bo);
-
- ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
-
- radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
- outinfo->esgs_itemsize / 4);
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
- radeon_emit(cmd_buffer->cs, va >> 8);
- radeon_emit(cmd_buffer->cs, va >> 40);
- radeon_emit(cmd_buffer->cs, shader->rsrc1);
- radeon_emit(cmd_buffer->cs, shader->rsrc2);
-}
-
-static void
-radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer,
- struct radv_shader_variant *shader)
-{
- struct radeon_winsys *ws = cmd_buffer->device->ws;
- uint64_t va = ws->buffer_get_va(shader->bo);
- uint32_t rsrc2 = shader->rsrc2;
-
- ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
-
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
- radeon_emit(cmd_buffer->cs, va >> 8);
- radeon_emit(cmd_buffer->cs, va >> 40);
-
- rsrc2 |= S_00B52C_LDS_SIZE(cmd_buffer->state.pipeline->graphics.tess.lds_size);
- if (cmd_buffer->device->physical_device->rad_info.chip_class == CIK &&
- cmd_buffer->device->physical_device->rad_info.family != CHIP_HAWAII)
- radeon_set_sh_reg(cmd_buffer->cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
-
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
- radeon_emit(cmd_buffer->cs, shader->rsrc1);
- radeon_emit(cmd_buffer->cs, rsrc2);
-}
-
-static void
-radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_shader_variant *shader)
-{
- struct radeon_winsys *ws = cmd_buffer->device->ws;
- uint64_t va = ws->buffer_get_va(shader->bo);
-
- ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
-
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
- radeon_emit(cmd_buffer->cs, va >> 8);
- radeon_emit(cmd_buffer->cs, va >> 40);
- radeon_emit(cmd_buffer->cs, shader->rsrc1);
- radeon_emit(cmd_buffer->cs, shader->rsrc2);
-}
-
-static void
-radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline)
-{
- struct radv_shader_variant *vs;
-
- assert (pipeline->shaders[MESA_SHADER_VERTEX]);
-
- vs = pipeline->shaders[MESA_SHADER_VERTEX];
-
- if (vs->info.vs.as_ls)
- radv_emit_hw_ls(cmd_buffer, vs);
- else if (vs->info.vs.as_es)
- radv_emit_hw_es(cmd_buffer, vs, &vs->info.vs.es_info);
- else
- radv_emit_hw_vs(cmd_buffer, pipeline, vs, &vs->info.vs.outinfo);
-
- radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, 0);
-}
-
-
-static void
-radv_emit_tess_shaders(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline)
-{
- if (!radv_pipeline_has_tess(pipeline))
- return;
-
- struct radv_shader_variant *tes, *tcs;
-
- tcs = pipeline->shaders[MESA_SHADER_TESS_CTRL];
- tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
-
- if (tes->info.tes.as_es)
- radv_emit_hw_es(cmd_buffer, tes, &tes->info.tes.es_info);
- else
- radv_emit_hw_vs(cmd_buffer, pipeline, tes, &tes->info.tes.outinfo);
+ S_02881C_USE_VTX_POINT_SIZE(vs->info.vs.writes_pointsize) |
+ S_02881C_VS_OUT_MISC_VEC_ENA(vs->info.vs.writes_pointsize) |
+ S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
+ pipeline->graphics.raster.pa_cl_vs_out_cntl |
+ cull_dist_mask << 8 |
+ clip_dist_mask);
- radv_emit_hw_hs(cmd_buffer, tcs);
-
- radeon_set_context_reg(cmd_buffer->cs, R_028B6C_VGT_TF_PARAM,
- pipeline->graphics.tess.tf_param);
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
- radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2,
- pipeline->graphics.tess.ls_hs_config);
- else
- radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG,
- pipeline->graphics.tess.ls_hs_config);
-
- struct ac_userdata_info *loc;
-
- loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_CTRL, AC_UD_TCS_OFFCHIP_LAYOUT);
- if (loc->sgpr_idx != -1) {
- uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_TESS_CTRL, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
- assert(loc->num_sgprs == 4);
- assert(!loc->indirect);
- radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 4);
- radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.offchip_layout);
- radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.tcs_out_offsets);
- radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.tcs_out_layout |
- pipeline->graphics.tess.num_tcs_input_cp << 26);
- radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.tcs_in_layout);
- }
-
- loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_EVAL, AC_UD_TES_OFFCHIP_LAYOUT);
- if (loc->sgpr_idx != -1) {
- uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_TESS_EVAL, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
- assert(loc->num_sgprs == 1);
- assert(!loc->indirect);
-
- radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
- pipeline->graphics.tess.offchip_layout);
- }
-
- loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, AC_UD_VS_LS_TCS_IN_LAYOUT);
- if (loc->sgpr_idx != -1) {
- uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
- assert(loc->num_sgprs == 1);
- assert(!loc->indirect);
-
- radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
- pipeline->graphics.tess.tcs_in_layout);
- }
}
-static void
-radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline)
-{
- struct radeon_winsys *ws = cmd_buffer->device->ws;
- struct radv_shader_variant *gs;
- uint64_t va;
-
- radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, pipeline->graphics.vgt_gs_mode);
-
- gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
- if (!gs)
- return;
-
- uint32_t gsvs_itemsize = gs->info.gs.max_gsvs_emit_size >> 2;
-
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3);
- radeon_emit(cmd_buffer->cs, gsvs_itemsize);
- radeon_emit(cmd_buffer->cs, gsvs_itemsize);
- radeon_emit(cmd_buffer->cs, gsvs_itemsize);
-
- radeon_set_context_reg(cmd_buffer->cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
-
- radeon_set_context_reg(cmd_buffer->cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out);
-
- uint32_t gs_vert_itemsize = gs->info.gs.gsvs_vertex_size;
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4);
- radeon_emit(cmd_buffer->cs, gs_vert_itemsize >> 2);
- radeon_emit(cmd_buffer->cs, 0);
- radeon_emit(cmd_buffer->cs, 0);
- radeon_emit(cmd_buffer->cs, 0);
-
- uint32_t gs_num_invocations = gs->info.gs.invocations;
- radeon_set_context_reg(cmd_buffer->cs, R_028B90_VGT_GS_INSTANCE_CNT,
- S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
- S_028B90_ENABLE(gs_num_invocations > 0));
-
- va = ws->buffer_get_va(gs->bo);
- ws->cs_add_buffer(cmd_buffer->cs, gs->bo, 8);
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
- radeon_emit(cmd_buffer->cs, va >> 8);
- radeon_emit(cmd_buffer->cs, va >> 40);
- radeon_emit(cmd_buffer->cs, gs->rsrc1);
- radeon_emit(cmd_buffer->cs, gs->rsrc2);
-
- radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader, &pipeline->gs_copy_shader->info.vs.outinfo);
-
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
- AC_UD_GS_VS_RING_STRIDE_ENTRIES);
- if (loc->sgpr_idx != -1) {
- uint32_t stride = gs->info.gs.max_gsvs_emit_size;
- uint32_t num_entries = 64;
- bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI;
-
- if (is_vi)
- num_entries *= stride;
- stride = S_008F04_STRIDE(stride);
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B230_SPI_SHADER_USER_DATA_GS_0 + loc->sgpr_idx * 4, 2);
- radeon_emit(cmd_buffer->cs, stride);
- radeon_emit(cmd_buffer->cs, num_entries);
- }
-}
static void
radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
{
struct radeon_winsys *ws = cmd_buffer->device->ws;
- struct radv_shader_variant *ps;
+ struct radv_shader_variant *ps, *vs;
uint64_t va;
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
struct radv_blend_state *blend = &pipeline->graphics.blend;
+ unsigned ps_offset = 0;
+ unsigned z_order;
assert (pipeline->shaders[MESA_SHADER_FRAGMENT]);
ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-
+ vs = pipeline->shaders[MESA_SHADER_VERTEX];
va = ws->buffer_get_va(ps->bo);
ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8);
@@ -790,8 +442,20 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cmd_buffer->cs, ps->rsrc1);
radeon_emit(cmd_buffer->cs, ps->rsrc2);
+ if (ps->info.fs.early_fragment_test || !ps->info.fs.writes_memory)
+ z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
+ else
+ z_order = V_02880C_LATE_Z;
+
+
radeon_set_context_reg(cmd_buffer->cs, R_02880C_DB_SHADER_CONTROL,
- pipeline->graphics.db_shader_control);
+ S_02880C_Z_EXPORT_ENABLE(ps->info.fs.writes_z) |
+ S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.fs.writes_stencil) |
+ S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) |
+ S_02880C_Z_ORDER(z_order) |
+ S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) |
+ S_02880C_EXEC_ON_HIER_FAIL(ps->info.fs.writes_memory) |
+ S_02880C_EXEC_ON_NOOP(ps->info.fs.writes_memory));
radeon_set_context_reg(cmd_buffer->cs, R_0286CC_SPI_PS_INPUT_ENA,
ps->config.spi_ps_input_ena);
@@ -799,43 +463,51 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
radeon_set_context_reg(cmd_buffer->cs, R_0286D0_SPI_PS_INPUT_ADDR,
ps->config.spi_ps_input_addr);
- if (ps->info.fs.force_persample)
- spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
-
+ spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
radeon_set_context_reg(cmd_buffer->cs, R_0286D8_SPI_PS_IN_CONTROL,
S_0286D8_NUM_INTERP(ps->info.fs.num_interp));
radeon_set_context_reg(cmd_buffer->cs, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
radeon_set_context_reg(cmd_buffer->cs, R_028710_SPI_SHADER_Z_FORMAT,
- pipeline->graphics.shader_z_format);
+ ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR :
+ ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R :
+ V_028710_SPI_SHADER_ZERO);
radeon_set_context_reg(cmd_buffer->cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
radeon_set_context_reg(cmd_buffer->cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
- if (pipeline->graphics.ps_input_cntl_num) {
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cntl_num);
- for (unsigned i = 0; i < pipeline->graphics.ps_input_cntl_num; i++) {
- radeon_emit(cmd_buffer->cs, pipeline->graphics.ps_input_cntl[i]);
- }
+ if (ps->info.fs.has_pcoord) {
+ unsigned val;
+ val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
+ radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
+ ps_offset = 1;
}
-}
-static void polaris_set_vgt_vertex_reuse(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline)
-{
- uint32_t vtx_reuse_depth = 30;
- if (cmd_buffer->device->physical_device->rad_info.family < CHIP_POLARIS10)
- return;
+ for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
+ unsigned vs_offset, flat_shade;
+ unsigned val;
+
+ if (!(ps->info.fs.input_mask & (1u << i)))
+ continue;
+
+
+ if (!(vs->info.vs.export_mask & (1u << i))) {
+ radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset,
+ S_028644_OFFSET(0x20));
+ ++ps_offset;
+ continue;
+ }
+
+ vs_offset = util_bitcount(vs->info.vs.export_mask & ((1u << i) - 1));
+ flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
- if (pipeline->shaders[MESA_SHADER_TESS_EVAL]) {
- if (pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD)
- vtx_reuse_depth = 14;
+ val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
+ radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
+ ++ps_offset;
}
- radeon_set_context_reg(cmd_buffer->cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
- vtx_reuse_depth);
}
static void
@@ -850,23 +522,11 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer,
radv_emit_graphics_raster_state(cmd_buffer, pipeline);
radv_update_multisample_state(cmd_buffer, pipeline);
radv_emit_vertex_shader(cmd_buffer, pipeline);
- radv_emit_tess_shaders(cmd_buffer, pipeline);
- radv_emit_geometry_shader(cmd_buffer, pipeline);
radv_emit_fragment_shader(cmd_buffer, pipeline);
- polaris_set_vgt_vertex_reuse(cmd_buffer, pipeline);
-
- cmd_buffer->scratch_size_needed =
- MAX2(cmd_buffer->scratch_size_needed,
- pipeline->max_waves * pipeline->scratch_bytes_per_wave);
- radeon_set_context_reg(cmd_buffer->cs, R_0286E8_SPI_TMPRING_SIZE,
- S_0286E8_WAVES(pipeline->max_waves) |
- S_0286E8_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10));
+ radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN,
+ pipeline->graphics.prim_restart_enable);
- if (!cmd_buffer->state.emitted_pipeline ||
- cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband !=
- pipeline->graphics.can_use_guardband)
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
cmd_buffer->state.emitted_pipeline = pipeline;
}
@@ -882,9 +542,7 @@ radv_emit_scissor(struct radv_cmd_buffer *cmd_buffer)
{
uint32_t count = cmd_buffer->state.dynamic.scissor.count;
si_write_scissors(cmd_buffer->cs, 0, count,
- cmd_buffer->state.dynamic.scissor.scissors,
- cmd_buffer->state.dynamic.viewport.viewports,
- cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband);
+ cmd_buffer->state.dynamic.scissor.scissors);
radeon_set_context_reg(cmd_buffer->cs, R_028A48_PA_SC_MODE_CNTL_0,
cmd_buffer->state.pipeline->graphics.ms.pa_sc_mode_cntl_0 | S_028A48_VPORT_SCISSOR_ENABLE(count ? 1 : 0));
}
@@ -894,7 +552,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
int index,
struct radv_color_buffer_info *cb)
{
- bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI;
+ bool is_vi = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= VI;
radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
radeon_emit(cmd_buffer->cs, cb->cb_color_base);
radeon_emit(cmd_buffer->cs, cb->cb_color_pitch);
@@ -986,7 +644,7 @@ radv_set_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer,
va += image->offset + image->clear_value_offset;
unsigned reg_offset = 0, reg_count = 0;
- if (!image->surface.htile_size || !aspects)
+ if (!image->htile.size || !aspects)
return;
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
@@ -1025,7 +683,7 @@ radv_load_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer,
uint64_t va = cmd_buffer->device->ws->buffer_get_va(image->bo);
va += image->offset + image->clear_value_offset;
- if (!image->surface.htile_size)
+ if (!image->htile.size)
return;
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, image->bo, 8);
@@ -1160,13 +818,13 @@ void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer)
uint32_t db_count_control;
if(!cmd_buffer->state.active_occlusion_queries) {
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
+ if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
db_count_control = 0;
} else {
db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);
}
} else {
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
+ if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) |
S_028004_SAMPLE_RATE(0) | /* TODO: set this to the number of samples of the current framebuffer */
S_028004_ZPASS_ENABLE(1) |
@@ -1186,15 +844,6 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- if (G_028810_DX_RASTERIZATION_KILL(cmd_buffer->state.pipeline->graphics.raster.pa_cl_clip_cntl))
- return;
-
- if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
- radv_emit_viewport(cmd_buffer);
-
- if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
- radv_emit_scissor(cmd_buffer);
-
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
unsigned width = cmd_buffer->state.dynamic.line_width * 8;
radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL,
@@ -1246,118 +895,9 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
}
static void
-emit_stage_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline,
- int idx,
- uint64_t va,
- gl_shader_stage stage)
-{
- struct ac_userdata_info *desc_set_loc = &pipeline->shaders[stage]->info.user_sgprs_locs.descriptor_sets[idx];
- uint32_t base_reg = shader_stage_to_user_data_0(stage, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
-
- if (desc_set_loc->sgpr_idx == -1)
- return;
-
- assert(!desc_set_loc->indirect);
- assert(desc_set_loc->num_sgprs == 2);
- radeon_set_sh_reg_seq(cmd_buffer->cs,
- base_reg + desc_set_loc->sgpr_idx * 4, 2);
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
-}
-
-static void
-radv_emit_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer,
- VkShaderStageFlags stages,
- struct radv_descriptor_set *set,
- unsigned idx)
-{
- if (cmd_buffer->state.pipeline) {
- if (stages & VK_SHADER_STAGE_FRAGMENT_BIT)
- emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline,
- idx, set->va,
- MESA_SHADER_FRAGMENT);
-
- if (stages & VK_SHADER_STAGE_VERTEX_BIT)
- emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline,
- idx, set->va,
- MESA_SHADER_VERTEX);
-
- if ((stages & VK_SHADER_STAGE_GEOMETRY_BIT) && radv_pipeline_has_gs(cmd_buffer->state.pipeline))
- emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline,
- idx, set->va,
- MESA_SHADER_GEOMETRY);
-
- if ((stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) && radv_pipeline_has_tess(cmd_buffer->state.pipeline))
- emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline,
- idx, set->va,
- MESA_SHADER_TESS_CTRL);
-
- if ((stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) && radv_pipeline_has_tess(cmd_buffer->state.pipeline))
- emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline,
- idx, set->va,
- MESA_SHADER_TESS_EVAL);
- }
-
- if (cmd_buffer->state.compute_pipeline && (stages & VK_SHADER_STAGE_COMPUTE_BIT))
- emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.compute_pipeline,
- idx, set->va,
- MESA_SHADER_COMPUTE);
-}
-
-static void
-radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer)
-{
- struct radv_descriptor_set *set = &cmd_buffer->push_descriptors.set;
- uint32_t *ptr = NULL;
- unsigned bo_offset;
-
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, set->size, 32,
- &bo_offset,
- (void**) &ptr))
- return;
-
- set->va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
- set->va += bo_offset;
-
- memcpy(ptr, set->mapped_ptr, set->size);
-}
-
-static void
-radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
- VkShaderStageFlags stages)
-{
- unsigned i;
- if (!cmd_buffer->state.descriptors_dirty)
- return;
-
- if (cmd_buffer->state.push_descriptors_dirty)
- radv_flush_push_descriptors(cmd_buffer);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs,
- MAX_SETS * MESA_SHADER_STAGES * 4);
-
- for (i = 0; i < MAX_SETS; i++) {
- if (!(cmd_buffer->state.descriptors_dirty & (1 << i)))
- continue;
- struct radv_descriptor_set *set = cmd_buffer->state.descriptors[i];
- if (!set)
- continue;
-
- radv_emit_descriptor_set_userdata(cmd_buffer, stages, set, i);
- }
- cmd_buffer->state.descriptors_dirty = 0;
- cmd_buffer->state.push_descriptors_dirty = false;
- assert(cmd_buffer->cs->cdw <= cdw_max);
-}
-
-static void
radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline,
- VkShaderStageFlags stages)
-{
- struct radv_pipeline_layout *layout = pipeline->layout;
+ struct radv_pipeline_layout *layout,
+ VkShaderStageFlags stages) {
unsigned offset;
void *ptr;
uint64_t va;
@@ -1366,10 +906,9 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
if (!stages || !layout || (!layout->push_constant_size && !layout->dynamic_offset_count))
return;
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size +
- 16 * layout->dynamic_offset_count,
- 256, &offset, &ptr))
- return;
+ radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size +
+ 16 * layout->dynamic_offset_count,
+ 256, &offset, &ptr);
memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
memcpy((char*)ptr + layout->push_constant_size, cmd_buffer->dynamic_buffers,
@@ -1378,70 +917,40 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
va += offset;
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, MESA_SHADER_STAGES * 4);
- if (stages & VK_SHADER_STAGE_VERTEX_BIT)
- radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_VERTEX,
- AC_UD_PUSH_CONSTANTS, va);
-
- if (stages & VK_SHADER_STAGE_FRAGMENT_BIT)
- radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_FRAGMENT,
- AC_UD_PUSH_CONSTANTS, va);
-
- if ((stages & VK_SHADER_STAGE_GEOMETRY_BIT) && radv_pipeline_has_gs(pipeline))
- radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_GEOMETRY,
- AC_UD_PUSH_CONSTANTS, va);
-
- if ((stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) && radv_pipeline_has_tess(pipeline))
- radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_TESS_CTRL,
- AC_UD_PUSH_CONSTANTS, va);
-
- if ((stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) && radv_pipeline_has_tess(pipeline))
- radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_TESS_EVAL,
- AC_UD_PUSH_CONSTANTS, va);
-
- if (stages & VK_SHADER_STAGE_COMPUTE_BIT)
- radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_COMPUTE,
- AC_UD_PUSH_CONSTANTS, va);
-
- cmd_buffer->push_constant_stages &= ~stages;
- assert(cmd_buffer->cs->cdw <= cdw_max);
-}
-
-static void radv_emit_primitive_reset_state(struct radv_cmd_buffer *cmd_buffer,
- bool indexed_draw)
-{
- int32_t primitive_reset_en = indexed_draw && cmd_buffer->state.pipeline->graphics.prim_restart_enable;
-
- if (primitive_reset_en != cmd_buffer->state.last_primitive_reset_en) {
- cmd_buffer->state.last_primitive_reset_en = primitive_reset_en;
- radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN,
- primitive_reset_en);
+ if (stages & VK_SHADER_STAGE_VERTEX_BIT) {
+ radeon_set_sh_reg_seq(cmd_buffer->cs,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_PUSH_CONST_DYN * 4, 2);
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
}
- if (primitive_reset_en) {
- uint32_t primitive_reset_index = cmd_buffer->state.index_type ? 0xffffffffu : 0xffffu;
+ if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
+ radeon_set_sh_reg_seq(cmd_buffer->cs,
+ R_00B030_SPI_SHADER_USER_DATA_PS_0 + AC_USERDATA_PUSH_CONST_DYN * 4, 2);
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
+ }
- if (primitive_reset_index != cmd_buffer->state.last_primitive_reset_index) {
- cmd_buffer->state.last_primitive_reset_index = primitive_reset_index;
- radeon_set_context_reg(cmd_buffer->cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
- primitive_reset_index);
- }
+ if (stages & VK_SHADER_STAGE_COMPUTE_BIT) {
+ radeon_set_sh_reg_seq(cmd_buffer->cs,
+ R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_PUSH_CONST_DYN * 4, 2);
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
}
+
+ cmd_buffer->push_constant_stages &= ~stages;
}
static void
-radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer,
- bool indexed_draw, bool instanced_draw,
- bool indirect_draw,
- uint32_t draw_vertex_count)
+radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
struct radv_device *device = cmd_buffer->device;
uint32_t ia_multi_vgt_param;
+ uint32_t ls_hs_config = 0;
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, 4096);
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs,
+ 4096);
if ((cmd_buffer->state.vertex_descriptors_dirty || cmd_buffer->state.vb_dirty) &&
cmd_buffer->state.pipeline->num_vertex_attribs) {
@@ -1469,7 +978,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer,
va += offset + buffer->offset;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
- if (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride)
+ if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class <= CIK && stride)
desc[2] = (buffer->size - offset - cmd_buffer->state.pipeline->va_format_size[i]) / stride + 1;
else
desc[2] = buffer->size - offset;
@@ -1478,9 +987,11 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer,
va = device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
va += vb_offset;
+ radeon_set_sh_reg_seq(cmd_buffer->cs,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_VERTEX_BUFFERS * 4, 2);
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
- radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_VERTEX,
- AC_UD_VS_VERTEX_BUFFERS, va);
}
cmd_buffer->state.vertex_descriptors_dirty = false;
@@ -1491,32 +1002,31 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer,
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RENDER_TARGETS)
radv_emit_framebuffer_state(cmd_buffer);
- ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, draw_vertex_count);
- if (cmd_buffer->state.last_ia_multi_vgt_param != ia_multi_vgt_param) {
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
- radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
- else
- radeon_set_context_reg(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
- cmd_buffer->state.last_ia_multi_vgt_param = ia_multi_vgt_param;
- }
+ if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
+ radv_emit_viewport(cmd_buffer);
+
+ if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR))
+ radv_emit_scissor(cmd_buffer);
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) {
- radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, pipeline->graphics.vgt_shader_stages_en);
+ radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, 0);
+ ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer);
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
+ if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
+ radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
+ radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
radeon_set_uconfig_reg_idx(cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, cmd_buffer->state.pipeline->graphics.prim);
} else {
radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, cmd_buffer->state.pipeline->graphics.prim);
+ radeon_set_context_reg(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
+ radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
}
radeon_set_context_reg(cmd_buffer->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, cmd_buffer->state.pipeline->graphics.gs_out);
}
radv_cmd_buffer_flush_dynamic_state(cmd_buffer);
- radv_emit_primitive_reset_state(cmd_buffer, indexed_draw);
-
- radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
- radv_flush_constants(cmd_buffer, cmd_buffer->state.pipeline,
+ radv_flush_constants(cmd_buffer, cmd_buffer->state.pipeline->layout,
VK_SHADER_STAGE_ALL_GRAPHICS);
assert(cmd_buffer->cs->cdw <= cdw_max);
@@ -1554,86 +1064,11 @@ static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer,
}
}
-static enum radv_cmd_flush_bits
-radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
- VkAccessFlags src_flags)
-{
- enum radv_cmd_flush_bits flush_bits = 0;
- uint32_t b;
- for_each_bit(b, src_flags) {
- switch ((VkAccessFlagBits)(1 << b)) {
- case VK_ACCESS_SHADER_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
- break;
- case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
- break;
- case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
- break;
- case VK_ACCESS_TRANSFER_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
- RADV_CMD_FLAG_INV_GLOBAL_L2;
- break;
- default:
- break;
- }
- }
- return flush_bits;
-}
-
-static enum radv_cmd_flush_bits
-radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
- VkAccessFlags dst_flags,
- struct radv_image *image)
-{
- enum radv_cmd_flush_bits flush_bits = 0;
- uint32_t b;
- for_each_bit(b, dst_flags) {
- switch ((VkAccessFlagBits)(1 << b)) {
- case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
- case VK_ACCESS_INDEX_READ_BIT:
- case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
- break;
- case VK_ACCESS_UNIFORM_READ_BIT:
- flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
- break;
- case VK_ACCESS_SHADER_READ_BIT:
- case VK_ACCESS_TRANSFER_READ_BIT:
- case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
- flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2;
- break;
- case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
- /* TODO: change to image && when the image gets passed
- * through from the subpass. */
- if (!image || (image->usage & VK_IMAGE_USAGE_STORAGE_BIT))
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
- break;
- case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT:
- if (!image || (image->usage & VK_IMAGE_USAGE_STORAGE_BIT))
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
- break;
- default:
- break;
- }
- }
- return flush_bits;
-}
-
static void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass_barrier *barrier)
{
- cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask);
radv_stage_flush(cmd_buffer, barrier->src_stage_mask);
- cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask,
- NULL);
+
+ /* TODO: actual cache flushes */
}
static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer,
@@ -1651,7 +1086,7 @@ static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buf
radv_handle_image_transition(cmd_buffer,
view->image,
cmd_buffer->state.attachments[idx].current_layout,
- att.layout, 0, 0, &range,
+ att.layout, range,
cmd_buffer->state.attachments[idx].pending_clear_aspects);
cmd_buffer->state.attachments[idx].current_layout = att.layout;
@@ -1751,27 +1186,9 @@ VkResult radv_AllocateCommandBuffers(
VkResult result = VK_SUCCESS;
uint32_t i;
- memset(pCommandBuffers, 0,
- sizeof(*pCommandBuffers)*pAllocateInfo->commandBufferCount);
-
for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
-
- if (!list_empty(&pool->free_cmd_buffers)) {
- struct radv_cmd_buffer *cmd_buffer = list_first_entry(&pool->free_cmd_buffers, struct radv_cmd_buffer, pool_link);
-
- list_del(&cmd_buffer->pool_link);
- list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
-
- radv_reset_cmd_buffer(cmd_buffer);
- cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
- cmd_buffer->level = pAllocateInfo->level;
-
- pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer);
- result = VK_SUCCESS;
- } else {
- result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level,
- &pCommandBuffers[i]);
- }
+ result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level,
+ &pCommandBuffers[i]);
if (result != VK_SUCCESS)
break;
}
@@ -1783,6 +1200,24 @@ VkResult radv_AllocateCommandBuffers(
return result;
}
+static void
+radv_cmd_buffer_destroy(struct radv_cmd_buffer *cmd_buffer)
+{
+ list_del(&cmd_buffer->pool_link);
+
+ list_for_each_entry_safe(struct radv_cmd_buffer_upload, up,
+ &cmd_buffer->upload.list, list) {
+ cmd_buffer->device->ws->buffer_destroy(up->upload_bo);
+ list_del(&up->list);
+ free(up);
+ }
+
+ if (cmd_buffer->upload.upload_bo)
+ cmd_buffer->device->ws->buffer_destroy(cmd_buffer->upload.upload_bo);
+ cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs);
+ vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
+}
+
void radv_FreeCommandBuffers(
VkDevice device,
VkCommandPool commandPool,
@@ -1792,15 +1227,29 @@ void radv_FreeCommandBuffers(
for (uint32_t i = 0; i < commandBufferCount; i++) {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
- if (cmd_buffer) {
- if (cmd_buffer->pool) {
- list_del(&cmd_buffer->pool_link);
- list_addtail(&cmd_buffer->pool_link, &cmd_buffer->pool->free_cmd_buffers);
- } else
- radv_cmd_buffer_destroy(cmd_buffer);
+ if (cmd_buffer)
+ radv_cmd_buffer_destroy(cmd_buffer);
+ }
+}
- }
+static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
+{
+
+ cmd_buffer->device->ws->cs_reset(cmd_buffer->cs);
+
+ list_for_each_entry_safe(struct radv_cmd_buffer_upload, up,
+ &cmd_buffer->upload.list, list) {
+ cmd_buffer->device->ws->buffer_destroy(up->upload_bo);
+ list_del(&up->list);
+ free(up);
}
+
+ if (cmd_buffer->upload.upload_bo)
+ cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
+ cmd_buffer->upload.upload_bo, 8);
+ cmd_buffer->upload.offset = 0;
+
+ cmd_buffer->record_fail = false;
}
VkResult radv_ResetCommandBuffer(
@@ -1812,20 +1261,6 @@ VkResult radv_ResetCommandBuffer(
return VK_SUCCESS;
}
-static void emit_gfx_buffer_state(struct radv_cmd_buffer *cmd_buffer)
-{
- struct radv_device *device = cmd_buffer->device;
- if (device->gfx_init) {
- uint64_t va = device->ws->buffer_get_va(device->gfx_init);
- device->ws->cs_add_buffer(cmd_buffer->cs, device->gfx_init, 8);
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, (va >> 32) & 0xffff);
- radeon_emit(cmd_buffer->cs, device->gfx_init_size_dw & 0xffff);
- } else
- si_init_config(cmd_buffer);
-}
-
VkResult radv_BeginCommandBuffer(
VkCommandBuffer commandBuffer,
const VkCommandBufferBeginInfo *pBeginInfo)
@@ -1834,22 +1269,20 @@ VkResult radv_BeginCommandBuffer(
radv_reset_cmd_buffer(cmd_buffer);
memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
- cmd_buffer->state.last_primitive_reset_en = -1;
/* setup initial configuration into command buffer */
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
- switch (cmd_buffer->queue_family_index) {
- case RADV_QUEUE_GENERAL:
- emit_gfx_buffer_state(cmd_buffer);
- radv_set_db_count_control(cmd_buffer);
- break;
- case RADV_QUEUE_COMPUTE:
- si_init_compute(cmd_buffer);
- break;
- case RADV_QUEUE_TRANSFER:
- default:
- break;
- }
+ /* Flush read caches at the beginning of CS not flushed by the kernel. */
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_ICACHE |
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_INV_VMEM_L1 |
+ RADV_CMD_FLAG_INV_SMEM_L1 |
+ RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER |
+ RADV_CMD_FLAG_INV_GLOBAL_L2;
+ si_init_config(&cmd_buffer->device->instance->physicalDevice, cmd_buffer);
+ radv_set_db_count_control(cmd_buffer);
+ si_emit_cache_flush(cmd_buffer);
}
if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
@@ -1863,7 +1296,6 @@ VkResult radv_BeginCommandBuffer(
radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
}
- radv_cmd_buffer_trace_emit(cmd_buffer);
return VK_SUCCESS;
}
@@ -1910,10 +1342,8 @@ void radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
{
struct radeon_winsys *ws = cmd_buffer->device->ws;
- assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
-
cmd_buffer->state.descriptors[idx] = set;
- cmd_buffer->state.descriptors_dirty |= (1 << idx);
+
if (!set)
return;
@@ -1921,6 +1351,21 @@ void radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
if (set->descriptors[j])
ws->cs_add_buffer(cmd_buffer->cs, set->descriptors[j], 7);
+ radeon_set_sh_reg_seq(cmd_buffer->cs,
+ R_00B030_SPI_SHADER_USER_DATA_PS_0 + 8 * idx, 2);
+ radeon_emit(cmd_buffer->cs, set->va);
+ radeon_emit(cmd_buffer->cs, set->va >> 32);
+
+ radeon_set_sh_reg_seq(cmd_buffer->cs,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0 + 8 * idx, 2);
+ radeon_emit(cmd_buffer->cs, set->va);
+ radeon_emit(cmd_buffer->cs, set->va >> 32);
+
+ radeon_set_sh_reg_seq(cmd_buffer->cs,
+ R_00B900_COMPUTE_USER_DATA_0 + 8 * idx, 2);
+ radeon_emit(cmd_buffer->cs, set->va);
+ radeon_emit(cmd_buffer->cs, set->va >> 32);
+
if(set->bo)
ws->cs_add_buffer(cmd_buffer->cs, set->bo, 8);
}
@@ -1939,13 +1384,16 @@ void radv_CmdBindDescriptorSets(
RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
unsigned dyn_idx = 0;
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs,
+ MAX_SETS * 4 * 6);
+
for (unsigned i = 0; i < descriptorSetCount; ++i) {
unsigned idx = i + firstSet;
RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);
radv_bind_descriptor_set(cmd_buffer, set, idx);
for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) {
- unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
+ unsigned idx = j + layout->set[i].dynamic_offset_start;
uint32_t *dst = cmd_buffer->dynamic_buffers + idx * 4;
assert(dyn_idx < dynamicOffsetCount);
@@ -1964,116 +1412,8 @@ void radv_CmdBindDescriptorSets(
set->layout->dynamic_shader_stages;
}
}
-}
-
-static bool radv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
- struct radv_descriptor_set *set,
- struct radv_descriptor_set_layout *layout)
-{
- set->size = layout->size;
- set->layout = layout;
-
- if (cmd_buffer->push_descriptors.capacity < set->size) {
- size_t new_size = MAX2(set->size, 1024);
- new_size = MAX2(new_size, 2 * cmd_buffer->push_descriptors.capacity);
- new_size = MIN2(new_size, 96 * MAX_PUSH_DESCRIPTORS);
-
- free(set->mapped_ptr);
- set->mapped_ptr = malloc(new_size);
-
- if (!set->mapped_ptr) {
- cmd_buffer->push_descriptors.capacity = 0;
- cmd_buffer->record_fail = true;
- return false;
- }
-
- cmd_buffer->push_descriptors.capacity = new_size;
- }
-
- return true;
-}
-
-void radv_meta_push_descriptor_set(
- struct radv_cmd_buffer* cmd_buffer,
- VkPipelineBindPoint pipelineBindPoint,
- VkPipelineLayout _layout,
- uint32_t set,
- uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet* pDescriptorWrites)
-{
- RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
- struct radv_descriptor_set *push_set = &cmd_buffer->meta_push_descriptors;
- unsigned bo_offset;
-
- assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
-
- push_set->size = layout->set[set].layout->size;
- push_set->layout = layout->set[set].layout;
-
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->size, 32,
- &bo_offset,
- (void**) &push_set->mapped_ptr))
- return;
-
- push_set->va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
- push_set->va += bo_offset;
-
- radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
- radv_descriptor_set_to_handle(push_set),
- descriptorWriteCount, pDescriptorWrites, 0, NULL);
-
- cmd_buffer->state.descriptors[set] = push_set;
- cmd_buffer->state.descriptors_dirty |= (1 << set);
-}
-
-void radv_CmdPushDescriptorSetKHR(
- VkCommandBuffer commandBuffer,
- VkPipelineBindPoint pipelineBindPoint,
- VkPipelineLayout _layout,
- uint32_t set,
- uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet* pDescriptorWrites)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
- struct radv_descriptor_set *push_set = &cmd_buffer->push_descriptors.set;
-
- assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
-
- if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout))
- return;
-
- radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
- radv_descriptor_set_to_handle(push_set),
- descriptorWriteCount, pDescriptorWrites, 0, NULL);
-
- cmd_buffer->state.descriptors[set] = push_set;
- cmd_buffer->state.descriptors_dirty |= (1 << set);
- cmd_buffer->state.push_descriptors_dirty = true;
-}
-
-void radv_CmdPushDescriptorSetWithTemplateKHR(
- VkCommandBuffer commandBuffer,
- VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate,
- VkPipelineLayout _layout,
- uint32_t set,
- const void* pData)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
- struct radv_descriptor_set *push_set = &cmd_buffer->push_descriptors.set;
-
- assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
-
- if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout))
- return;
-
- radv_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set,
- descriptorUpdateTemplate, pData);
- cmd_buffer->state.descriptors[set] = push_set;
- cmd_buffer->state.descriptors_dirty |= (1 << set);
- cmd_buffer->state.push_descriptors_dirty = true;
+ assert(cmd_buffer->cs->cdw <= cdw_max);
}
void radv_CmdPushConstants(VkCommandBuffer commandBuffer,
@@ -2093,9 +1433,7 @@ VkResult radv_EndCommandBuffer(
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER)
- si_emit_cache_flush(cmd_buffer);
-
+ si_emit_cache_flush(cmd_buffer);
if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) ||
cmd_buffer->record_fail)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
@@ -2120,8 +1458,7 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
ws->cs_add_buffer(cmd_buffer->cs, compute_shader->bo, 8);
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, 16);
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 16);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B830_COMPUTE_PGM_LO, 2);
radeon_emit(cmd_buffer->cs, va >> 8);
@@ -2131,15 +1468,9 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
radeon_emit(cmd_buffer->cs, compute_shader->rsrc1);
radeon_emit(cmd_buffer->cs, compute_shader->rsrc2);
-
- cmd_buffer->compute_scratch_size_needed =
- MAX2(cmd_buffer->compute_scratch_size_needed,
- pipeline->max_waves * pipeline->scratch_bytes_per_wave);
-
/* change these once we have scratch support */
radeon_set_sh_reg(cmd_buffer->cs, R_00B860_COMPUTE_TMPRING_SIZE,
- S_00B860_WAVES(pipeline->max_waves) |
- S_00B860_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10));
+ S_00B860_WAVES(32) | S_00B860_WAVESIZE(0));
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
radeon_emit(cmd_buffer->cs,
@@ -2152,13 +1483,6 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
assert(cmd_buffer->cs->cdw <= cdw_max);
}
-static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer)
-{
- for (unsigned i = 0; i < MAX_SETS; i++) {
- if (cmd_buffer->state.descriptors[i])
- cmd_buffer->state.descriptors_dirty |= (1u << i);
- }
-}
void radv_CmdBindPipeline(
VkCommandBuffer commandBuffer,
@@ -2168,8 +1492,6 @@ void radv_CmdBindPipeline(
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
- radv_mark_descriptor_sets_dirty(cmd_buffer);
-
switch (pipelineBindPoint) {
case VK_PIPELINE_BIND_POINT_COMPUTE:
cmd_buffer->state.compute_pipeline = pipeline;
@@ -2177,9 +1499,6 @@ void radv_CmdBindPipeline(
break;
case VK_PIPELINE_BIND_POINT_GRAPHICS:
cmd_buffer->state.pipeline = pipeline;
- if (!pipeline)
- break;
-
cmd_buffer->state.vertex_descriptors_dirty = true;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
cmd_buffer->push_constant_stages |= pipeline->active_stages;
@@ -2189,23 +1508,6 @@ void radv_CmdBindPipeline(
radv_dynamic_state_copy(&cmd_buffer->state.dynamic,
&pipeline->dynamic_state,
pipeline->dynamic_state_mask);
-
- if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed)
- cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size;
- if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed)
- cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size;
-
- if (radv_pipeline_has_tess(pipeline))
- cmd_buffer->tess_rings_needed = true;
-
- if (radv_pipeline_has_gs(pipeline)) {
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
- AC_UD_SCRATCH_RING_OFFSETS);
- if (cmd_buffer->ring_offsets_idx == -1)
- cmd_buffer->ring_offsets_idx = loc->sgpr_idx;
- else if (loc->sgpr_idx != -1)
- assert(loc->sgpr_idx == cmd_buffer->ring_offsets_idx);
- }
break;
default:
assert(!"invalid bind point");
@@ -2342,6 +1644,7 @@ void radv_CmdSetStencilReference(
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
}
+
void radv_CmdExecuteCommands(
VkCommandBuffer commandBuffer,
uint32_t commandBufferCount,
@@ -2349,44 +1652,17 @@ void radv_CmdExecuteCommands(
{
RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer);
- /* Emit pending flushes on primary prior to executing secondary */
- si_emit_cache_flush(primary);
-
for (uint32_t i = 0; i < commandBufferCount; i++) {
RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]);
- primary->scratch_size_needed = MAX2(primary->scratch_size_needed,
- secondary->scratch_size_needed);
- primary->compute_scratch_size_needed = MAX2(primary->compute_scratch_size_needed,
- secondary->compute_scratch_size_needed);
-
- if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed)
- primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed;
- if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed)
- primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
- if (secondary->tess_rings_needed)
- primary->tess_rings_needed = true;
- if (secondary->sample_positions_needed)
- primary->sample_positions_needed = true;
-
- if (secondary->ring_offsets_idx != -1) {
- if (primary->ring_offsets_idx == -1)
- primary->ring_offsets_idx = secondary->ring_offsets_idx;
- else
- assert(secondary->ring_offsets_idx == primary->ring_offsets_idx);
- }
primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs);
}
/* if we execute secondary we need to re-emit out pipelines */
if (commandBufferCount) {
primary->state.emitted_pipeline = NULL;
- primary->state.emitted_compute_pipeline = NULL;
primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
primary->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ALL;
- primary->state.last_primitive_reset_en = -1;
- primary->state.last_primitive_reset_index = 0;
- radv_mark_descriptor_sets_dirty(primary);
}
}
@@ -2410,9 +1686,6 @@ VkResult radv_CreateCommandPool(
pool->alloc = device->alloc;
list_inithead(&pool->cmd_buffers);
- list_inithead(&pool->free_cmd_buffers);
-
- pool->queue_family_index = pCreateInfo->queueFamilyIndex;
*pCmdPool = radv_cmd_pool_to_handle(pool);
@@ -2436,11 +1709,6 @@ void radv_DestroyCommandPool(
radv_cmd_buffer_destroy(cmd_buffer);
}
- list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer,
- &pool->free_cmd_buffers, pool_link) {
- radv_cmd_buffer_destroy(cmd_buffer);
- }
-
vk_free2(&device->alloc, pAllocator, pool);
}
@@ -2459,22 +1727,6 @@ VkResult radv_ResetCommandPool(
return VK_SUCCESS;
}
-void radv_TrimCommandPoolKHR(
- VkDevice device,
- VkCommandPool commandPool,
- VkCommandPoolTrimFlagsKHR flags)
-{
- RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
-
- if (!pool)
- return;
-
- list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer,
- &pool->free_cmd_buffers, pool_link) {
- radv_cmd_buffer_destroy(cmd_buffer);
- }
-}
-
void radv_CmdBeginRenderPass(
VkCommandBuffer commandBuffer,
const VkRenderPassBeginInfo* pRenderPassBegin,
@@ -2484,14 +1736,16 @@ void radv_CmdBeginRenderPass(
RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass);
RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, 2048);
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs,
+ 2048);
cmd_buffer->state.framebuffer = framebuffer;
cmd_buffer->state.pass = pass;
cmd_buffer->state.render_area = pRenderPassBegin->renderArea;
radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBegin);
+ si_emit_cache_flush(cmd_buffer);
+
radv_cmd_buffer_set_subpass(cmd_buffer, pass->subpasses, true);
assert(cmd_buffer->cs->cdw <= cdw_max);
@@ -2504,6 +1758,7 @@ void radv_CmdNextSubpass(
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ si_emit_cache_flush(cmd_buffer);
radv_cmd_buffer_resolve_subpass(cmd_buffer);
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs,
@@ -2521,21 +1776,13 @@ void radv_CmdDraw(
uint32_t firstInstance)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ radv_cmd_buffer_flush_state(cmd_buffer);
- radv_cmd_buffer_flush_state(cmd_buffer, false, (instanceCount > 1), false, vertexCount);
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10);
-
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
- AC_UD_VS_BASE_VERTEX_START_INSTANCE);
- if (loc->sgpr_idx != -1) {
- uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(cmd_buffer->state.pipeline),
- radv_pipeline_has_tess(cmd_buffer->state.pipeline));
- radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 3);
- radeon_emit(cmd_buffer->cs, firstVertex);
- radeon_emit(cmd_buffer->cs, firstInstance);
- radeon_emit(cmd_buffer->cs, 0);
- }
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_BASE_VERTEX * 4, 2);
+ radeon_emit(cmd_buffer->cs, firstVertex);
+ radeon_emit(cmd_buffer->cs, firstInstance);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
radeon_emit(cmd_buffer->cs, instanceCount);
@@ -2545,8 +1792,18 @@ void radv_CmdDraw(
S_0287F0_USE_OPAQUE(0));
assert(cmd_buffer->cs->cdw <= cdw_max);
+}
- radv_cmd_buffer_trace_emit(cmd_buffer);
+static void radv_emit_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer)
+{
+ uint32_t primitive_reset_index = cmd_buffer->state.last_primitive_reset_index ? 0xffffffffu : 0xffffu;
+
+ if (cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
+ primitive_reset_index != cmd_buffer->state.last_primitive_reset_index) {
+ cmd_buffer->state.last_primitive_reset_index = primitive_reset_index;
+ radeon_set_context_reg(cmd_buffer->cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
+ primitive_reset_index);
+ }
}
void radv_CmdDrawIndexed(
@@ -2562,23 +1819,17 @@ void radv_CmdDrawIndexed(
uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size;
uint64_t index_va;
- radv_cmd_buffer_flush_state(cmd_buffer, true, (instanceCount > 1), false, indexCount);
+ radv_cmd_buffer_flush_state(cmd_buffer);
+ radv_emit_primitive_reset_index(cmd_buffer);
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15);
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 14);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
radeon_emit(cmd_buffer->cs, cmd_buffer->state.index_type);
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
- AC_UD_VS_BASE_VERTEX_START_INSTANCE);
- if (loc->sgpr_idx != -1) {
- uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(cmd_buffer->state.pipeline),
- radv_pipeline_has_tess(cmd_buffer->state.pipeline));
- radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 3);
- radeon_emit(cmd_buffer->cs, vertexOffset);
- radeon_emit(cmd_buffer->cs, firstInstance);
- radeon_emit(cmd_buffer->cs, 0);
- }
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_BASE_VERTEX * 4, 2);
+ radeon_emit(cmd_buffer->cs, vertexOffset);
+ radeon_emit(cmd_buffer->cs, firstInstance);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
radeon_emit(cmd_buffer->cs, instanceCount);
@@ -2592,43 +1843,28 @@ void radv_CmdDrawIndexed(
radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA);
assert(cmd_buffer->cs->cdw <= cdw_max);
- radv_cmd_buffer_trace_emit(cmd_buffer);
}
static void
radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
VkBuffer _buffer,
VkDeviceSize offset,
- VkBuffer _count_buffer,
- VkDeviceSize count_offset,
uint32_t draw_count,
uint32_t stride,
bool indexed)
{
RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
- RADV_FROM_HANDLE(radv_buffer, count_buffer, _count_buffer);
struct radeon_winsys_cs *cs = cmd_buffer->cs;
unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA
: V_0287F0_DI_SRC_SEL_AUTO_INDEX;
uint64_t indirect_va = cmd_buffer->device->ws->buffer_get_va(buffer->bo);
indirect_va += offset + buffer->offset;
- uint64_t count_va = 0;
-
- if (count_buffer) {
- count_va = cmd_buffer->device->ws->buffer_get_va(count_buffer->bo);
- count_va += count_offset + count_buffer->offset;
- }
if (!draw_count)
return;
cmd_buffer->device->ws->cs_add_buffer(cs, buffer->bo, 8);
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
- AC_UD_VS_BASE_VERTEX_START_INSTANCE);
- uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(cmd_buffer->state.pipeline),
- radv_pipeline_has_tess(cmd_buffer->state.pipeline));
- assert(loc->sgpr_idx != -1);
radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
radeon_emit(cs, 1);
radeon_emit(cs, indirect_va);
@@ -2638,60 +1874,51 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
PKT3_DRAW_INDIRECT_MULTI,
8, false));
radeon_emit(cs, 0);
- radeon_emit(cs, ((base_reg + loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, ((base_reg + (loc->sgpr_idx + 1) * 4) - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, (((base_reg + (loc->sgpr_idx + 2) * 4) - SI_SH_REG_OFFSET) >> 2) |
- S_2C3_DRAW_INDEX_ENABLE(1) |
- S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
+ radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_BASE_VERTEX * 4) - SI_SH_REG_OFFSET) >> 2);
+ radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_START_INSTANCE * 4) - SI_SH_REG_OFFSET) >> 2);
+ radeon_emit(cs, 0); /* draw_index */
radeon_emit(cs, draw_count); /* count */
- radeon_emit(cs, count_va); /* count_addr */
- radeon_emit(cs, count_va >> 32);
+ radeon_emit(cs, 0); /* count_addr -- disabled */
+ radeon_emit(cs, 0);
radeon_emit(cs, stride); /* stride */
radeon_emit(cs, di_src_sel);
- radv_cmd_buffer_trace_emit(cmd_buffer);
}
-static void
-radv_cmd_draw_indirect_count(VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset,
- VkBuffer countBuffer,
- VkDeviceSize countBufferOffset,
- uint32_t maxDrawCount,
- uint32_t stride)
+void radv_CmdDrawIndirect(
+ VkCommandBuffer commandBuffer,
+ VkBuffer _buffer,
+ VkDeviceSize offset,
+ uint32_t drawCount,
+ uint32_t stride)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_cmd_buffer_flush_state(cmd_buffer, false, false, true, 0);
+ radv_cmd_buffer_flush_state(cmd_buffer);
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, 14);
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 14);
- radv_emit_indirect_draw(cmd_buffer, buffer, offset,
- countBuffer, countBufferOffset, maxDrawCount, stride, false);
+ radv_emit_indirect_draw(cmd_buffer, _buffer, offset, drawCount, stride, false);
assert(cmd_buffer->cs->cdw <= cdw_max);
}
-static void
-radv_cmd_draw_indexed_indirect_count(
+void radv_CmdDrawIndexedIndirect(
VkCommandBuffer commandBuffer,
- VkBuffer buffer,
+ VkBuffer _buffer,
VkDeviceSize offset,
- VkBuffer countBuffer,
- VkDeviceSize countBufferOffset,
- uint32_t maxDrawCount,
+ uint32_t drawCount,
uint32_t stride)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
int index_size = cmd_buffer->state.index_type ? 4 : 2;
uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size;
uint64_t index_va;
- radv_cmd_buffer_flush_state(cmd_buffer, true, false, true, 0);
+ radv_cmd_buffer_flush_state(cmd_buffer);
+ radv_emit_primitive_reset_index(cmd_buffer);
index_va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->state.index_buffer->bo);
index_va += cmd_buffer->state.index_buffer->offset + cmd_buffer->state.index_offset;
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 21);
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 21);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
radeon_emit(cmd_buffer->cs, cmd_buffer->state.index_type);
@@ -2703,72 +1930,11 @@ radv_cmd_draw_indexed_indirect_count(
radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
radeon_emit(cmd_buffer->cs, index_max_size);
- radv_emit_indirect_draw(cmd_buffer, buffer, offset,
- countBuffer, countBufferOffset, maxDrawCount, stride, true);
+ radv_emit_indirect_draw(cmd_buffer, _buffer, offset, drawCount, stride, true);
assert(cmd_buffer->cs->cdw <= cdw_max);
}
-void radv_CmdDrawIndirect(
- VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset,
- uint32_t drawCount,
- uint32_t stride)
-{
- radv_cmd_draw_indirect_count(commandBuffer, buffer, offset,
- VK_NULL_HANDLE, 0, drawCount, stride);
-}
-
-void radv_CmdDrawIndexedIndirect(
- VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset,
- uint32_t drawCount,
- uint32_t stride)
-{
- radv_cmd_draw_indexed_indirect_count(commandBuffer, buffer, offset,
- VK_NULL_HANDLE, 0, drawCount, stride);
-}
-
-void radv_CmdDrawIndirectCountAMD(
- VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset,
- VkBuffer countBuffer,
- VkDeviceSize countBufferOffset,
- uint32_t maxDrawCount,
- uint32_t stride)
-{
- radv_cmd_draw_indirect_count(commandBuffer, buffer, offset,
- countBuffer, countBufferOffset,
- maxDrawCount, stride);
-}
-
-void radv_CmdDrawIndexedIndirectCountAMD(
- VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset,
- VkBuffer countBuffer,
- VkDeviceSize countBufferOffset,
- uint32_t maxDrawCount,
- uint32_t stride)
-{
- radv_cmd_draw_indexed_indirect_count(commandBuffer, buffer, offset,
- countBuffer, countBufferOffset,
- maxDrawCount, stride);
-}
-
-static void
-radv_flush_compute_state(struct radv_cmd_buffer *cmd_buffer)
-{
- radv_emit_compute_pipeline(cmd_buffer);
- radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
- radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline,
- VK_SHADER_STAGE_COMPUTE_BIT);
- si_emit_cache_flush(cmd_buffer);
-}
-
void radv_CmdDispatch(
VkCommandBuffer commandBuffer,
uint32_t x,
@@ -2777,20 +1943,16 @@ void radv_CmdDispatch(
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_flush_compute_state(cmd_buffer);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10);
+ radv_emit_compute_pipeline(cmd_buffer);
+ radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline->layout,
+ VK_SHADER_STAGE_COMPUTE_BIT);
+ si_emit_cache_flush(cmd_buffer);
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10);
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
- MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
- if (loc->sgpr_idx != -1) {
- assert(!loc->indirect);
- assert(loc->num_sgprs == 3);
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3);
- radeon_emit(cmd_buffer->cs, x);
- radeon_emit(cmd_buffer->cs, y);
- radeon_emit(cmd_buffer->cs, z);
- }
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_CS_GRID_SIZE * 4, 3);
+ radeon_emit(cmd_buffer->cs, x);
+ radeon_emit(cmd_buffer->cs, y);
+ radeon_emit(cmd_buffer->cs, z);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
PKT3_SHADER_TYPE_S(1));
@@ -2800,7 +1962,6 @@ void radv_CmdDispatch(
radeon_emit(cmd_buffer->cs, 1);
assert(cmd_buffer->cs->cdw <= cdw_max);
- radv_cmd_buffer_trace_emit(cmd_buffer);
}
void radv_CmdDispatchIndirect(
@@ -2815,44 +1976,35 @@ void radv_CmdDispatchIndirect(
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, buffer->bo, 8);
- radv_flush_compute_state(cmd_buffer);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 25);
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
- MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
- if (loc->sgpr_idx != -1) {
- for (unsigned i = 0; i < 3; ++i) {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_REG));
- radeon_emit(cmd_buffer->cs, (va + 4 * i));
- radeon_emit(cmd_buffer->cs, (va + 4 * i) >> 32);
- radeon_emit(cmd_buffer->cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i);
- radeon_emit(cmd_buffer->cs, 0);
- }
- }
+ radv_emit_compute_pipeline(cmd_buffer);
+ radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline->layout,
+ VK_SHADER_STAGE_COMPUTE_BIT);
+ si_emit_cache_flush(cmd_buffer);
- if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
- radeon_emit(cmd_buffer->cs, 1);
- } else {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_BASE, 2, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cmd_buffer->cs, 1);
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 25);
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) |
- PKT3_SHADER_TYPE_S(1));
+ for (unsigned i = 0; i < 3; ++i) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+ COPY_DATA_DST_SEL(COPY_DATA_REG));
+ radeon_emit(cmd_buffer->cs, (va + 4 * i));
+ radeon_emit(cmd_buffer->cs, (va + 4 * i) >> 32);
+ radeon_emit(cmd_buffer->cs, ((R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_CS_GRID_SIZE * 4) >> 2) + i);
radeon_emit(cmd_buffer->cs, 0);
- radeon_emit(cmd_buffer->cs, 1);
}
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_BASE, 2, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cmd_buffer->cs, 1);
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
+
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cmd_buffer->cs, 0);
+ radeon_emit(cmd_buffer->cs, 1);
+
assert(cmd_buffer->cs->cdw <= cdw_max);
- radv_cmd_buffer_trace_emit(cmd_buffer);
}
void radv_unaligned_dispatch(
@@ -2874,9 +2026,11 @@ void radv_unaligned_dispatch(
remainder[1] = y + compute_shader->info.cs.block_size[1] - align_u32_npot(y, compute_shader->info.cs.block_size[1]);
remainder[2] = z + compute_shader->info.cs.block_size[2] - align_u32_npot(z, compute_shader->info.cs.block_size[2]);
- radv_flush_compute_state(cmd_buffer);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15);
+ radv_emit_compute_pipeline(cmd_buffer);
+ radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline->layout,
+ VK_SHADER_STAGE_COMPUTE_BIT);
+ si_emit_cache_flush(cmd_buffer);
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
radeon_emit(cmd_buffer->cs,
@@ -2889,14 +2043,11 @@ void radv_unaligned_dispatch(
S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]) |
S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
- MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
- if (loc->sgpr_idx != -1) {
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3);
- radeon_emit(cmd_buffer->cs, blocks[0]);
- radeon_emit(cmd_buffer->cs, blocks[1]);
- radeon_emit(cmd_buffer->cs, blocks[2]);
- }
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_CS_GRID_SIZE * 4, 3);
+ radeon_emit(cmd_buffer->cs, blocks[0]);
+ radeon_emit(cmd_buffer->cs, blocks[1]);
+ radeon_emit(cmd_buffer->cs, blocks[2]);
+
radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
PKT3_SHADER_TYPE_S(1));
radeon_emit(cmd_buffer->cs, blocks[0]);
@@ -2906,7 +2057,6 @@ void radv_unaligned_dispatch(
S_00B800_PARTIAL_TG_EN(1));
assert(cmd_buffer->cs->cdw <= cdw_max);
- radv_cmd_buffer_trace_emit(cmd_buffer);
}
void radv_CmdEndRenderPass(
@@ -2916,6 +2066,7 @@ void radv_CmdEndRenderPass(
radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier);
+ si_emit_cache_flush(cmd_buffer);
radv_cmd_buffer_resolve_subpass(cmd_buffer);
for (unsigned i = 0; i < cmd_buffer->state.framebuffer->attachment_count; ++i) {
@@ -2934,32 +2085,26 @@ void radv_CmdEndRenderPass(
static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range)
+ struct radv_image *image)
{
- assert(range->baseMipLevel == 0);
- assert(range->levelCount == 1 || range->levelCount == VK_REMAINING_ARRAY_LAYERS);
- unsigned layer_count = radv_get_layerCount(image, range);
- uint64_t size = image->surface.htile_slice_size * layer_count;
- uint64_t offset = image->offset + image->htile_offset +
- image->surface.htile_slice_size * range->baseArrayLayer;
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
- radv_fill_buffer(cmd_buffer, image->bo, offset, size, 0xffffffff);
+ radv_fill_buffer(cmd_buffer, image->bo, image->offset + image->htile.offset,
+ image->htile.size, 0xffffffff);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+ RADV_CMD_FLAG_INV_GLOBAL_L2;
}
static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkImageLayout src_layout,
VkImageLayout dst_layout,
- const VkImageSubresourceRange *range,
+ VkImageSubresourceRange range,
VkImageAspectFlags pending_clears)
{
if (dst_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
@@ -2972,26 +2117,20 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
} else if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
radv_layout_has_htile(image, dst_layout)) {
/* TODO: merge with the clear if applicable */
- radv_initialize_htile(cmd_buffer, image, range);
+ radv_initialize_htile(cmd_buffer, image);
} else if (!radv_layout_has_htile(image, src_layout) &&
radv_layout_has_htile(image, dst_layout)) {
- radv_initialize_htile(cmd_buffer, image, range);
+ radv_initialize_htile(cmd_buffer, image);
} else if ((radv_layout_has_htile(image, src_layout) &&
!radv_layout_has_htile(image, dst_layout)) ||
(radv_layout_is_htile_compressed(image, src_layout) &&
!radv_layout_is_htile_compressed(image, dst_layout))) {
- VkImageSubresourceRange local_range = *range;
- local_range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
- local_range.baseMipLevel = 0;
- local_range.levelCount = 1;
-
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
- radv_decompress_depth_image_inplace(cmd_buffer, image, &local_range);
+ range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
+ range.baseMipLevel = 0;
+ range.levelCount = 1;
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+ radv_decompress_depth_image_inplace(cmd_buffer, image, &range);
}
}
@@ -3007,16 +2146,14 @@ void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+ RADV_CMD_FLAG_INV_GLOBAL_L2;
}
static void radv_handle_cmask_image_transition(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkImageLayout src_layout,
VkImageLayout dst_layout,
- unsigned src_queue_mask,
- unsigned dst_queue_mask,
- const VkImageSubresourceRange *range,
+ VkImageSubresourceRange range,
VkImageAspectFlags pending_clears)
{
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
@@ -3024,9 +2161,9 @@ static void radv_handle_cmask_image_transition(struct radv_cmd_buffer *cmd_buffe
radv_initialise_cmask(cmd_buffer, image, 0xccccccccu);
else
radv_initialise_cmask(cmd_buffer, image, 0xffffffffu);
- } else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) &&
- !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) {
- radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
+ } else if (radv_layout_has_cmask(image, src_layout) &&
+ !radv_layout_has_cmask(image, dst_layout)) {
+ radv_fast_clear_flush_image_inplace(cmd_buffer, image);
}
}
@@ -3044,23 +2181,21 @@ void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+ RADV_CMD_FLAG_INV_GLOBAL_L2;
}
static void radv_handle_dcc_image_transition(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkImageLayout src_layout,
VkImageLayout dst_layout,
- unsigned src_queue_mask,
- unsigned dst_queue_mask,
- const VkImageSubresourceRange *range,
+ VkImageSubresourceRange range,
VkImageAspectFlags pending_clears)
{
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
radv_initialize_dcc(cmd_buffer, image, 0x20202020u);
- } else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) &&
- !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) {
- radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
+ } else if(src_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
+ dst_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
+ radv_fast_clear_flush_image_inplace(cmd_buffer, image);
}
}
@@ -3068,46 +2203,20 @@ static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkImageLayout src_layout,
VkImageLayout dst_layout,
- uint32_t src_family,
- uint32_t dst_family,
- const VkImageSubresourceRange *range,
+ VkImageSubresourceRange range,
VkImageAspectFlags pending_clears)
{
- if (image->exclusive && src_family != dst_family) {
- /* This is an acquire or a release operation and there will be
- * a corresponding release/acquire. Do the transition in the
- * most flexible queue. */
-
- assert(src_family == cmd_buffer->queue_family_index ||
- dst_family == cmd_buffer->queue_family_index);
-
- if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER)
- return;
-
- if (cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
- (src_family == RADV_QUEUE_GENERAL ||
- dst_family == RADV_QUEUE_GENERAL))
- return;
- }
-
- unsigned src_queue_mask = radv_image_queue_family_mask(image, src_family, cmd_buffer->queue_family_index);
- unsigned dst_queue_mask = radv_image_queue_family_mask(image, dst_family, cmd_buffer->queue_family_index);
-
- if (image->surface.htile_size)
+ if (image->htile.size)
radv_handle_depth_image_transition(cmd_buffer, image, src_layout,
dst_layout, range, pending_clears);
if (image->cmask.size)
radv_handle_cmask_image_transition(cmd_buffer, image, src_layout,
- dst_layout, src_queue_mask,
- dst_queue_mask, range,
- pending_clears);
+ dst_layout, range, pending_clears);
if (image->surface.dcc_size)
radv_handle_dcc_image_transition(cmd_buffer, image, src_layout,
- dst_layout, src_queue_mask,
- dst_queue_mask, range,
- pending_clears);
+ dst_layout, range, pending_clears);
}
void radv_CmdPipelineBarrier(
@@ -3123,43 +2232,76 @@ void radv_CmdPipelineBarrier(
const VkImageMemoryBarrier* pImageMemoryBarriers)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- enum radv_cmd_flush_bits src_flush_bits = 0;
- enum radv_cmd_flush_bits dst_flush_bits = 0;
-
+ VkAccessFlags src_flags = 0;
+ VkAccessFlags dst_flags = 0;
+ uint32_t b;
for (uint32_t i = 0; i < memoryBarrierCount; i++) {
- src_flush_bits |= radv_src_access_flush(cmd_buffer, pMemoryBarriers[i].srcAccessMask);
- dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pMemoryBarriers[i].dstAccessMask,
- NULL);
+ src_flags |= pMemoryBarriers[i].srcAccessMask;
+ dst_flags |= pMemoryBarriers[i].dstAccessMask;
}
for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) {
- src_flush_bits |= radv_src_access_flush(cmd_buffer, pBufferMemoryBarriers[i].srcAccessMask);
- dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pBufferMemoryBarriers[i].dstAccessMask,
- NULL);
+ src_flags |= pBufferMemoryBarriers[i].srcAccessMask;
+ dst_flags |= pBufferMemoryBarriers[i].dstAccessMask;
}
for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image);
- src_flush_bits |= radv_src_access_flush(cmd_buffer, pImageMemoryBarriers[i].srcAccessMask);
- dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pImageMemoryBarriers[i].dstAccessMask,
- image);
- }
-
- radv_stage_flush(cmd_buffer, srcStageMask);
- cmd_buffer->state.flush_bits |= src_flush_bits;
+ src_flags |= pImageMemoryBarriers[i].srcAccessMask;
+ dst_flags |= pImageMemoryBarriers[i].dstAccessMask;
- for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
- RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image);
radv_handle_image_transition(cmd_buffer, image,
pImageMemoryBarriers[i].oldLayout,
pImageMemoryBarriers[i].newLayout,
- pImageMemoryBarriers[i].srcQueueFamilyIndex,
- pImageMemoryBarriers[i].dstQueueFamilyIndex,
- &pImageMemoryBarriers[i].subresourceRange,
+ pImageMemoryBarriers[i].subresourceRange,
0);
}
- cmd_buffer->state.flush_bits |= dst_flush_bits;
+ enum radv_cmd_flush_bits flush_bits = 0;
+
+ for_each_bit(b, src_flags) {
+ switch ((VkAccessFlagBits)(1 << b)) {
+ case VK_ACCESS_SHADER_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
+ break;
+ case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+ break;
+ case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+ break;
+ case VK_ACCESS_TRANSFER_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+ break;
+ default:
+ break;
+ }
+ }
+
+ for_each_bit(b, dst_flags) {
+ switch ((VkAccessFlagBits)(1 << b)) {
+ case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
+ case VK_ACCESS_INDEX_READ_BIT:
+ case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
+ case VK_ACCESS_UNIFORM_READ_BIT:
+ flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1;
+ break;
+ case VK_ACCESS_SHADER_READ_BIT:
+ flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
+ break;
+ case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
+ case VK_ACCESS_TRANSFER_READ_BIT:
+ case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
+ flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2;
+ default:
+ break;
+ }
+ }
+
+ flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
+
+ cmd_buffer->state.flush_bits |= flush_bits;
}
@@ -3173,12 +2315,12 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->device->ws->cs_add_buffer(cs, event->bo, 8);
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12);
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12);
/* TODO: this is overkill. Probably should figure something out from
* the stage mask. */
- if (cmd_buffer->device->physical_device->rad_info.chip_class == CIK) {
+ if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class == CIK) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
EVENT_INDEX(5));
@@ -3240,7 +2382,7 @@ void radv_CmdWaitEvents(VkCommandBuffer commandBuffer,
cmd_buffer->device->ws->cs_add_buffer(cs, event->bo, 8);
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7);
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7);
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
@@ -3260,9 +2402,7 @@ void radv_CmdWaitEvents(VkCommandBuffer commandBuffer,
radv_handle_image_transition(cmd_buffer, image,
pImageMemoryBarriers[i].oldLayout,
pImageMemoryBarriers[i].newLayout,
- pImageMemoryBarriers[i].srcQueueFamilyIndex,
- pImageMemoryBarriers[i].dstQueueFamilyIndex,
- &pImageMemoryBarriers[i].subresourceRange,
+ pImageMemoryBarriers[i].subresourceRange,
0);
}
diff --git a/lib/mesa/src/amd/vulkan/radv_cs.h b/lib/mesa/src/amd/vulkan/radv_cs.h
index 0990270f5..2c8935f30 100644
--- a/lib/mesa/src/amd/vulkan/radv_cs.h
+++ b/lib/mesa/src/amd/vulkan/radv_cs.h
@@ -43,7 +43,6 @@ static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsign
{
assert(reg < R600_CONTEXT_REG_OFFSET);
assert(cs->cdw + 2 + num <= cs->max_dw);
- assert(num);
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
}
@@ -58,7 +57,6 @@ static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsig
{
assert(reg >= R600_CONTEXT_REG_OFFSET);
assert(cs->cdw + 2 + num <= cs->max_dw);
- assert(num);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
}
@@ -85,7 +83,6 @@ static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned r
{
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
- assert(num);
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
}
@@ -100,7 +97,6 @@ static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsig
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
- assert(num);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
}
diff --git a/lib/mesa/src/amd/vulkan/radv_descriptor_set.c b/lib/mesa/src/amd/vulkan/radv_descriptor_set.c
index 48cb8c2a3..eb8b5d6e3 100644
--- a/lib/mesa/src/amd/vulkan/radv_descriptor_set.c
+++ b/lib/mesa/src/amd/vulkan/radv_descriptor_set.c
@@ -50,19 +50,18 @@ VkResult radv_CreateDescriptorSetLayout(
immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
}
- uint32_t samplers_offset = sizeof(struct radv_descriptor_set_layout) +
- (max_binding + 1) * sizeof(set_layout->binding[0]);
- size_t size = samplers_offset + immutable_sampler_count * 4 * sizeof(uint32_t);
+ size_t size = sizeof(struct radv_descriptor_set_layout) +
+ (max_binding + 1) * sizeof(set_layout->binding[0]) +
+ immutable_sampler_count * sizeof(struct radv_sampler *);
set_layout = vk_alloc2(&device->alloc, pAllocator, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!set_layout)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- set_layout->flags = pCreateInfo->flags;
-
/* We just allocate all the samplers at the end of the struct */
- uint32_t *samplers = (uint32_t*)&set_layout->binding[max_binding + 1];
+ struct radv_sampler **samplers =
+ (struct radv_sampler **)&set_layout->binding[max_binding + 1];
set_layout->binding_count = max_binding + 1;
set_layout->shader_stages = 0;
@@ -81,7 +80,6 @@ VkResult radv_CreateDescriptorSetLayout(
switch (binding->descriptorType) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
- assert(!(pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
set_layout->binding[b].dynamic_offset_count = 1;
set_layout->dynamic_shader_stages |= binding->stageFlags;
set_layout->binding[b].size = 0;
@@ -127,32 +125,23 @@ VkResult radv_CreateDescriptorSetLayout(
set_layout->binding[b].buffer_offset = buffer_count;
set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count;
- if (binding->pImmutableSamplers) {
- set_layout->binding[b].immutable_samplers_offset = samplers_offset;
- set_layout->binding[b].immutable_samplers_equal = true;
+ set_layout->size += binding->descriptorCount * set_layout->binding[b].size;
+ buffer_count += binding->descriptorCount * set_layout->binding[b].buffer_count;
+ dynamic_offset_count += binding->descriptorCount *
+ set_layout->binding[b].dynamic_offset_count;
+
+ if (binding->pImmutableSamplers) {
+ set_layout->binding[b].immutable_samplers = samplers;
+ samplers += binding->descriptorCount;
for (uint32_t i = 0; i < binding->descriptorCount; i++)
- memcpy(samplers + 4 * i, &radv_sampler_from_handle(binding->pImmutableSamplers[i])->state, 16);
- for (uint32_t i = 1; i < binding->descriptorCount; i++)
- if (memcmp(samplers + 4 * i, samplers, 16) != 0)
- set_layout->binding[b].immutable_samplers_equal = false;
-
- /* Don't reserve space for the samplers if they're not accessed. */
- if (set_layout->binding[b].immutable_samplers_equal) {
- if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
- set_layout->binding[b].size -= 32;
- else if (binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER)
- set_layout->binding[b].size -= 16;
- }
- samplers += 4 * binding->descriptorCount;
- samplers_offset += 4 * sizeof(uint32_t) * binding->descriptorCount;
+ set_layout->binding[b].immutable_samplers[i] =
+ radv_sampler_from_handle(binding->pImmutableSamplers[i]);
+ } else {
+ set_layout->binding[b].immutable_samplers = NULL;
}
- set_layout->size += binding->descriptorCount * set_layout->binding[b].size;
- buffer_count += binding->descriptorCount * set_layout->binding[b].buffer_count;
- dynamic_offset_count += binding->descriptorCount *
- set_layout->binding[b].dynamic_offset_count;
set_layout->shader_stages |= binding->stageFlags;
}
@@ -191,7 +180,7 @@ VkResult radv_CreatePipelineLayout(
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_pipeline_layout *layout;
- struct mesa_sha1 ctx;
+ struct mesa_sha1 *ctx;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO);
@@ -205,7 +194,7 @@ VkResult radv_CreatePipelineLayout(
unsigned dynamic_offset_count = 0;
- _mesa_sha1_init(&ctx);
+ ctx = _mesa_sha1_init();
for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) {
RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout,
pCreateInfo->pSetLayouts[set]);
@@ -214,11 +203,8 @@ VkResult radv_CreatePipelineLayout(
layout->set[set].dynamic_offset_start = dynamic_offset_count;
for (uint32_t b = 0; b < set_layout->binding_count; b++) {
dynamic_offset_count += set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count;
- if (set_layout->binding[b].immutable_samplers_offset)
- _mesa_sha1_update(&ctx, radv_immutable_samplers(set_layout, set_layout->binding + b),
- set_layout->binding[b].array_size * 4 * sizeof(uint32_t));
}
- _mesa_sha1_update(&ctx, set_layout->binding,
+ _mesa_sha1_update(ctx, set_layout->binding,
sizeof(set_layout->binding[0]) * set_layout->binding_count);
}
@@ -231,9 +217,9 @@ VkResult radv_CreatePipelineLayout(
}
layout->push_constant_size = align(layout->push_constant_size, 16);
- _mesa_sha1_update(&ctx, &layout->push_constant_size,
+ _mesa_sha1_update(ctx, &layout->push_constant_size,
sizeof(layout->push_constant_size));
- _mesa_sha1_final(&ctx, layout->sha1);
+ _mesa_sha1_final(ctx, layout->sha1);
*pPipelineLayout = radv_pipeline_layout_to_handle(layout);
return VK_SUCCESS;
@@ -257,6 +243,7 @@ void radv_DestroyPipelineLayout(
static VkResult
radv_descriptor_set_create(struct radv_device *device,
struct radv_descriptor_pool *pool,
+ struct radv_cmd_buffer *cmd_buffer,
const struct radv_descriptor_set_layout *layout,
struct radv_descriptor_set **out_set)
{
@@ -287,53 +274,72 @@ radv_descriptor_set_create(struct radv_device *device,
if (layout->size) {
uint32_t layout_size = align_u32(layout->size, 32);
set->size = layout->size;
+ if (!cmd_buffer) {
+ if (pool->current_offset + layout_size <= pool->size) {
+ set->bo = pool->bo;
+ set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset);
+ set->va = device->ws->buffer_get_va(set->bo) + pool->current_offset;
+ pool->current_offset += layout_size;
+
+ } else {
+ int entry = pool->free_list, prev_entry = -1;
+ uint32_t offset;
+ while (entry >= 0) {
+ if (pool->free_nodes[entry].size >= layout_size) {
+ if (prev_entry >= 0)
+ pool->free_nodes[prev_entry].next = pool->free_nodes[entry].next;
+ else
+ pool->free_list = pool->free_nodes[entry].next;
+ break;
+ }
+ prev_entry = entry;
+ entry = pool->free_nodes[entry].next;
+ }
- /* try to allocate linearly first, so that we don't spend
- * time looking for gaps if the app only allocates &
- * resets via the pool. */
- if (pool->current_offset + layout_size <= pool->size) {
- set->bo = pool->bo;
- set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset);
- set->va = device->ws->buffer_get_va(set->bo) + pool->current_offset;
- pool->current_offset += layout_size;
- list_addtail(&set->vram_list, &pool->vram_list);
- } else {
- uint64_t offset = 0;
- struct list_head *prev = &pool->vram_list;
- struct radv_descriptor_set *cur;
- LIST_FOR_EACH_ENTRY(cur, &pool->vram_list, vram_list) {
- uint64_t start = (uint8_t*)cur->mapped_ptr - pool->mapped_ptr;
- if (start - offset >= layout_size)
- break;
-
- offset = start + cur->size;
- prev = &cur->vram_list;
- }
+ if (entry < 0) {
+ vk_free2(&device->alloc, NULL, set);
+ return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+ offset = pool->free_nodes[entry].offset;
+ pool->free_nodes[entry].next = pool->full_list;
+ pool->full_list = entry;
- if (pool->size - offset < layout_size) {
+ set->bo = pool->bo;
+ set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset);
+ set->va = device->ws->buffer_get_va(set->bo) + offset;
+ }
+ } else {
+ unsigned bo_offset;
+ if (!radv_cmd_buffer_upload_alloc(cmd_buffer, set->size, 32,
+ &bo_offset,
+ (void**)&set->mapped_ptr)) {
vk_free2(&device->alloc, NULL, set->dynamic_descriptors);
vk_free2(&device->alloc, NULL, set);
- return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
- set->bo = pool->bo;
- set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset);
- set->va = device->ws->buffer_get_va(set->bo) + offset;
- list_add(&set->vram_list, prev);
+
+ set->va = device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
+ set->va += bo_offset;
}
}
+ if (pool)
+ list_add(&set->descriptor_pool, &pool->descriptor_sets);
+ else
+ list_inithead(&set->descriptor_pool);
+
for (unsigned i = 0; i < layout->binding_count; ++i) {
- if (!layout->binding[i].immutable_samplers_offset ||
- layout->binding[i].immutable_samplers_equal)
+ if (!layout->binding[i].immutable_samplers)
continue;
unsigned offset = layout->binding[i].offset / 4;
if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
offset += 16;
- const uint32_t *samplers = (const uint32_t*)((const char*)layout + layout->binding[i].immutable_samplers_offset);
for (unsigned j = 0; j < layout->binding[i].array_size; ++j) {
- memcpy(set->mapped_ptr + offset, samplers + 4 * j, 16);
+ struct radv_sampler* sampler = layout->binding[i].immutable_samplers[j];
+
+ memcpy(set->mapped_ptr + offset, &sampler->state, 16);
offset += layout->binding[i].size / 4;
}
@@ -348,13 +354,46 @@ radv_descriptor_set_destroy(struct radv_device *device,
struct radv_descriptor_set *set,
bool free_bo)
{
- if (free_bo && set->size)
- list_del(&set->vram_list);
+ if (free_bo && set->size) {
+ assert(pool->full_list >= 0);
+ int next = pool->free_nodes[pool->full_list].next;
+ pool->free_nodes[pool->full_list].next = pool->free_list;
+ pool->free_nodes[pool->full_list].offset = (uint8_t*)set->mapped_ptr - pool->mapped_ptr;
+ pool->free_nodes[pool->full_list].size = align_u32(set->size, 32);
+ pool->free_list = pool->full_list;
+ pool->full_list = next;
+ }
if (set->dynamic_descriptors)
vk_free2(&device->alloc, NULL, set->dynamic_descriptors);
+ if (!list_empty(&set->descriptor_pool))
+ list_del(&set->descriptor_pool);
vk_free2(&device->alloc, NULL, set);
}
+VkResult
+radv_temp_descriptor_set_create(struct radv_device *device,
+ struct radv_cmd_buffer *cmd_buffer,
+ VkDescriptorSetLayout _layout,
+ VkDescriptorSet *_set)
+{
+ RADV_FROM_HANDLE(radv_descriptor_set_layout, layout, _layout);
+ struct radv_descriptor_set *set;
+ VkResult ret;
+
+ ret = radv_descriptor_set_create(device, NULL, cmd_buffer, layout, &set);
+ *_set = radv_descriptor_set_to_handle(set);
+ return ret;
+}
+
+void
+radv_temp_descriptor_set_destroy(struct radv_device *device,
+ VkDescriptorSet _set)
+{
+ RADV_FROM_HANDLE(radv_descriptor_set, set, _set);
+
+ radv_descriptor_set_destroy(device, NULL, set, false);
+}
+
VkResult radv_CreateDescriptorPool(
VkDevice _device,
const VkDescriptorPoolCreateInfo* pCreateInfo,
@@ -363,7 +402,9 @@ VkResult radv_CreateDescriptorPool(
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_descriptor_pool *pool;
- int size = sizeof(struct radv_descriptor_pool);
+ unsigned max_sets = pCreateInfo->maxSets * 2;
+ int size = sizeof(struct radv_descriptor_pool) +
+ max_sets * sizeof(struct radv_descriptor_pool_free_node);
uint64_t bo_size = 0;
pool = vk_alloc2(&device->alloc, pAllocator, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -372,6 +413,14 @@ VkResult radv_CreateDescriptorPool(
memset(pool, 0, sizeof(*pool));
+ pool->free_list = -1;
+ pool->full_list = 0;
+ pool->free_nodes[max_sets - 1].next = -1;
+ pool->max_sets = max_sets;
+
+ for (int i = 0; i + 1 < max_sets; ++i)
+ pool->free_nodes[i].next = i + 1;
+
for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
switch(pCreateInfo->pPoolSizes[i].type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -406,7 +455,7 @@ VkResult radv_CreateDescriptorPool(
}
pool->size = bo_size;
- list_inithead(&pool->vram_list);
+ list_inithead(&pool->descriptor_sets);
*pDescriptorPool = radv_descriptor_pool_to_handle(pool);
return VK_SUCCESS;
}
@@ -423,7 +472,7 @@ void radv_DestroyDescriptorPool(
return;
list_for_each_entry_safe(struct radv_descriptor_set, set,
- &pool->vram_list, vram_list) {
+ &pool->descriptor_sets, descriptor_pool) {
radv_descriptor_set_destroy(device, pool, set, false);
}
@@ -441,13 +490,17 @@ VkResult radv_ResetDescriptorPool(
RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool);
list_for_each_entry_safe(struct radv_descriptor_set, set,
- &pool->vram_list, vram_list) {
+ &pool->descriptor_sets, descriptor_pool) {
radv_descriptor_set_destroy(device, pool, set, false);
}
- list_inithead(&pool->vram_list);
-
pool->current_offset = 0;
+ pool->free_list = -1;
+ pool->full_list = 0;
+ pool->free_nodes[pool->max_sets - 1].next = -1;
+
+ for (int i = 0; i + 1 < pool->max_sets; ++i)
+ pool->free_nodes[i].next = i + 1;
return VK_SUCCESS;
}
@@ -469,9 +522,7 @@ VkResult radv_AllocateDescriptorSets(
RADV_FROM_HANDLE(radv_descriptor_set_layout, layout,
pAllocateInfo->pSetLayouts[i]);
- assert(!(layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
-
- result = radv_descriptor_set_create(device, pool, layout, &set);
+ result = radv_descriptor_set_create(device, pool, NULL, layout, &set);
if (result != VK_SUCCESS)
break;
@@ -503,7 +554,6 @@ VkResult radv_FreeDescriptorSets(
}
static void write_texel_buffer_descriptor(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
unsigned *dst,
struct radeon_winsys_bo **buffer_list,
const VkBufferView _buffer_view)
@@ -511,15 +561,10 @@ static void write_texel_buffer_descriptor(struct radv_device *device,
RADV_FROM_HANDLE(radv_buffer_view, buffer_view, _buffer_view);
memcpy(dst, buffer_view->state, 4 * 4);
-
- if (cmd_buffer)
- device->ws->cs_add_buffer(cmd_buffer->cs, buffer_view->bo, 7);
- else
- *buffer_list = buffer_view->bo;
+ *buffer_list = buffer_view->bo;
}
static void write_buffer_descriptor(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
unsigned *dst,
struct radeon_winsys_bo **buffer_list,
const VkDescriptorBufferInfo *buffer_info)
@@ -542,10 +587,7 @@ static void write_buffer_descriptor(struct radv_device *device,
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
- if (cmd_buffer)
- device->ws->cs_add_buffer(cmd_buffer->cs, buffer->bo, 7);
- else
- *buffer_list = buffer->bo;
+ *buffer_list = buffer->bo;
}
static void write_dynamic_buffer_descriptor(struct radv_device *device,
@@ -569,7 +611,6 @@ static void write_dynamic_buffer_descriptor(struct radv_device *device,
static void
write_image_descriptor(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
unsigned *dst,
struct radeon_winsys_bo **buffer_list,
const VkDescriptorImageInfo *image_info)
@@ -577,16 +618,11 @@ write_image_descriptor(struct radv_device *device,
RADV_FROM_HANDLE(radv_image_view, iview, image_info->imageView);
memcpy(dst, iview->descriptor, 8 * 4);
memcpy(dst + 8, iview->fmask_descriptor, 8 * 4);
-
- if (cmd_buffer)
- device->ws->cs_add_buffer(cmd_buffer->cs, iview->bo, 7);
- else
- *buffer_list = iview->bo;
+ *buffer_list = iview->bo;
}
static void
write_combined_image_sampler_descriptor(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
unsigned *dst,
struct radeon_winsys_bo **buffer_list,
const VkDescriptorImageInfo *image_info,
@@ -594,7 +630,7 @@ write_combined_image_sampler_descriptor(struct radv_device *device,
{
RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
- write_image_descriptor(device, cmd_buffer, dst, buffer_list, image_info);
+ write_image_descriptor(device, dst, buffer_list, image_info);
/* copy over sampler state */
if (has_sampler)
memcpy(dst + 16, sampler->state, 16);
@@ -610,31 +646,22 @@ write_sampler_descriptor(struct radv_device *device,
memcpy(dst, sampler->state, 16);
}
-void radv_update_descriptor_sets(
- struct radv_device* device,
- struct radv_cmd_buffer* cmd_buffer,
- VkDescriptorSet dstSetOverride,
+void radv_UpdateDescriptorSets(
+ VkDevice _device,
uint32_t descriptorWriteCount,
const VkWriteDescriptorSet* pDescriptorWrites,
uint32_t descriptorCopyCount,
const VkCopyDescriptorSet* pDescriptorCopies)
{
+ RADV_FROM_HANDLE(radv_device, device, _device);
uint32_t i, j;
for (i = 0; i < descriptorWriteCount; i++) {
const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
- RADV_FROM_HANDLE(radv_descriptor_set, set,
- dstSetOverride ? dstSetOverride : writeset->dstSet);
+ RADV_FROM_HANDLE(radv_descriptor_set, set, writeset->dstSet);
const struct radv_descriptor_set_binding_layout *binding_layout =
set->layout->binding + writeset->dstBinding;
uint32_t *ptr = set->mapped_ptr;
struct radeon_winsys_bo **buffer_list = set->descriptors;
- /* Immutable samplers are not copied into push descriptors when they are
- * allocated, so if we are writing push descriptors we have to copy the
- * immutable samplers into them now.
- */
- const bool copy_immutable_samplers = cmd_buffer &&
- binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal;
- const uint32_t *samplers = radv_immutable_samplers(set->layout, binding_layout);
ptr += binding_layout->offset / 4;
ptr += binding_layout->size * writeset->dstArrayElement / 4;
@@ -646,44 +673,35 @@ void radv_update_descriptor_sets(
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
unsigned idx = writeset->dstArrayElement + j;
idx += binding_layout->dynamic_offset_offset;
- assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
write_dynamic_buffer_descriptor(device, set->dynamic_descriptors + idx,
buffer_list, writeset->pBufferInfo + j);
break;
}
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- write_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
+ write_buffer_descriptor(device, ptr, buffer_list,
writeset->pBufferInfo + j);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
+ write_texel_buffer_descriptor(device, ptr, buffer_list,
writeset->pTexelBufferView[j]);
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- write_image_descriptor(device, cmd_buffer, ptr, buffer_list,
+ write_image_descriptor(device, ptr, buffer_list,
writeset->pImageInfo + j);
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- write_combined_image_sampler_descriptor(device, cmd_buffer, ptr, buffer_list,
+ write_combined_image_sampler_descriptor(device, ptr, buffer_list,
writeset->pImageInfo + j,
- !binding_layout->immutable_samplers_offset);
- if (copy_immutable_samplers) {
- const unsigned idx = writeset->dstArrayElement + j;
- memcpy(ptr + 16, samplers + 4 * idx, 16);
- }
+ !binding_layout->immutable_samplers);
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:
- if (!binding_layout->immutable_samplers_offset) {
- write_sampler_descriptor(device, ptr,
- writeset->pImageInfo + j);
- } else if (copy_immutable_samplers) {
- unsigned idx = writeset->dstArrayElement + j;
- memcpy(ptr, samplers + 4 * idx, 16);
- }
+ assert(!binding_layout->immutable_samplers);
+ write_sampler_descriptor(device, ptr,
+ writeset->pImageInfo + j);
break;
default:
unreachable("unimplemented descriptor type");
@@ -697,180 +715,3 @@ void radv_update_descriptor_sets(
if (descriptorCopyCount)
radv_finishme("copy descriptors");
}
-
-void radv_UpdateDescriptorSets(
- VkDevice _device,
- uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet* pDescriptorWrites,
- uint32_t descriptorCopyCount,
- const VkCopyDescriptorSet* pDescriptorCopies)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
-
- radv_update_descriptor_sets(device, NULL, VK_NULL_HANDLE, descriptorWriteCount, pDescriptorWrites,
- descriptorCopyCount, pDescriptorCopies);
-}
-
-VkResult radv_CreateDescriptorUpdateTemplateKHR(VkDevice _device,
- const VkDescriptorUpdateTemplateCreateInfoKHR *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkDescriptorUpdateTemplateKHR *pDescriptorUpdateTemplate)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, pCreateInfo->descriptorSetLayout);
- const uint32_t entry_count = pCreateInfo->descriptorUpdateEntryCount;
- const size_t size = sizeof(struct radv_descriptor_update_template) +
- sizeof(struct radv_descriptor_update_template_entry) * entry_count;
- struct radv_descriptor_update_template *templ;
- uint32_t i;
-
- templ = vk_alloc2(&device->alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (!templ)
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-
- templ->entry_count = entry_count;
-
- for (i = 0; i < entry_count; i++) {
- const VkDescriptorUpdateTemplateEntryKHR *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
- const struct radv_descriptor_set_binding_layout *binding_layout =
- set_layout->binding + entry->dstBinding;
- const uint32_t buffer_offset = binding_layout->buffer_offset +
- binding_layout->buffer_count * entry->dstArrayElement;
- const uint32_t *immutable_samplers = NULL;
- uint32_t dst_offset;
- uint32_t dst_stride;
-
- /* dst_offset is an offset into dynamic_descriptors when the descriptor
- is dynamic, and an offset into mapped_ptr otherwise */
- switch (entry->descriptorType) {
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
- assert(pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR);
- dst_offset = binding_layout->dynamic_offset_offset + entry->dstArrayElement;
- dst_stride = 0; /* Not used */
- break;
- default:
- switch (entry->descriptorType) {
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- case VK_DESCRIPTOR_TYPE_SAMPLER:
- /* Immutable samplers are copied into push descriptors when they are pushed */
- if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR &&
- binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal) {
- immutable_samplers = radv_immutable_samplers(set_layout, binding_layout) + entry->dstArrayElement * 4;
- }
- break;
- default:
- break;
- }
- dst_offset = binding_layout->offset / 4 + binding_layout->size * entry->dstArrayElement / 4;
- dst_stride = binding_layout->size / 4;
- break;
- }
-
- templ->entry[i] = (struct radv_descriptor_update_template_entry) {
- .descriptor_type = entry->descriptorType,
- .descriptor_count = entry->descriptorCount,
- .src_offset = entry->offset,
- .src_stride = entry->stride,
- .dst_offset = dst_offset,
- .dst_stride = dst_stride,
- .buffer_offset = buffer_offset,
- .buffer_count = binding_layout->buffer_count,
- .has_sampler = !binding_layout->immutable_samplers_offset,
- .immutable_samplers = immutable_samplers
- };
- }
-
- *pDescriptorUpdateTemplate = radv_descriptor_update_template_to_handle(templ);
- return VK_SUCCESS;
-}
-
-void radv_DestroyDescriptorUpdateTemplateKHR(VkDevice _device,
- VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate,
- const VkAllocationCallbacks *pAllocator)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
-
- if (!templ)
- return;
-
- vk_free2(&device->alloc, pAllocator, templ);
-}
-
-void radv_update_descriptor_set_with_template(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
- struct radv_descriptor_set *set,
- VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate,
- const void *pData)
-{
- RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
- uint32_t i;
-
- for (i = 0; i < templ->entry_count; ++i) {
- struct radeon_winsys_bo **buffer_list = set->descriptors + templ->entry[i].buffer_offset;
- uint32_t *pDst = set->mapped_ptr + templ->entry[i].dst_offset;
- const uint8_t *pSrc = ((const uint8_t *) pData) + templ->entry[i].src_offset;
- uint32_t j;
-
- for (j = 0; j < templ->entry[i].descriptor_count; ++j) {
- switch (templ->entry[i].descriptor_type) {
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
- const unsigned idx = templ->entry[i].dst_offset + j;
- assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
- write_dynamic_buffer_descriptor(device, set->dynamic_descriptors + idx,
- buffer_list, (struct VkDescriptorBufferInfo *) pSrc);
- break;
- }
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- write_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
- (struct VkDescriptorBufferInfo *) pSrc);
- break;
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
- *(VkBufferView *) pSrc);
- break;
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- write_image_descriptor(device, cmd_buffer, pDst, buffer_list,
- (struct VkDescriptorImageInfo *) pSrc);
- break;
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- write_combined_image_sampler_descriptor(device, cmd_buffer, pDst, buffer_list,
- (struct VkDescriptorImageInfo *) pSrc,
- templ->entry[i].has_sampler);
- if (templ->entry[i].immutable_samplers)
- memcpy(pDst + 16, templ->entry[i].immutable_samplers + 4 * j, 16);
- break;
- case VK_DESCRIPTOR_TYPE_SAMPLER:
- if (templ->entry[i].has_sampler)
- write_sampler_descriptor(device, pDst,
- (struct VkDescriptorImageInfo *) pSrc);
- else if (templ->entry[i].immutable_samplers)
- memcpy(pDst, templ->entry[i].immutable_samplers + 4 * j, 16);
- break;
- default:
- unreachable("unimplemented descriptor type");
- break;
- }
- pSrc += templ->entry[i].src_stride;
- pDst += templ->entry[i].dst_stride;
- buffer_list += templ->entry[i].buffer_count;
- }
- }
-}
-
-void radv_UpdateDescriptorSetWithTemplateKHR(VkDevice _device,
- VkDescriptorSet descriptorSet,
- VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate,
- const void *pData)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_descriptor_set, set, descriptorSet);
-
- radv_update_descriptor_set_with_template(device, NULL, set, descriptorUpdateTemplate, pData);
-}
diff --git a/lib/mesa/src/amd/vulkan/radv_descriptor_set.h b/lib/mesa/src/amd/vulkan/radv_descriptor_set.h
index a9f4bc649..067482275 100644
--- a/lib/mesa/src/amd/vulkan/radv_descriptor_set.h
+++ b/lib/mesa/src/amd/vulkan/radv_descriptor_set.h
@@ -32,39 +32,34 @@ struct radv_descriptor_set_binding_layout {
VkDescriptorType type;
/* Number of array elements in this binding */
- uint32_t array_size;
+ uint16_t array_size;
- uint32_t offset;
- uint32_t buffer_offset;
+ uint16_t offset;
+ uint16_t buffer_offset;
uint16_t dynamic_offset_offset;
/* redundant with the type, each for a single array element */
- uint32_t size;
- uint32_t buffer_count;
+ uint16_t size;
+ uint16_t buffer_count;
uint16_t dynamic_offset_count;
- /* Offset in the radv_descriptor_set_layout of the immutable samplers, or 0
- * if there are no immutable samplers. */
- uint32_t immutable_samplers_offset;
- bool immutable_samplers_equal;
+ /* Immutable samplers (or NULL if no immutable samplers) */
+ struct radv_sampler **immutable_samplers;
};
struct radv_descriptor_set_layout {
- /* The create flags for this descriptor set layout */
- VkDescriptorSetLayoutCreateFlags flags;
-
/* Number of bindings in this descriptor set */
- uint32_t binding_count;
+ uint16_t binding_count;
/* Total size of the descriptor set with room for all array entries */
- uint32_t size;
+ uint16_t size;
/* Shader stages affected by this descriptor set */
uint16_t shader_stages;
uint16_t dynamic_shader_stages;
/* Number of buffers in this descriptor set */
- uint32_t buffer_count;
+ uint16_t buffer_count;
/* Number of dynamic offsets used by this descriptor set */
uint16_t dynamic_offset_count;
@@ -87,9 +82,4 @@ struct radv_pipeline_layout {
unsigned char sha1[20];
};
-static inline const uint32_t *
-radv_immutable_samplers(const struct radv_descriptor_set_layout *set,
- const struct radv_descriptor_set_binding_layout *binding) {
- return (const uint32_t*)((const char*)set + binding->immutable_samplers_offset);
-}
#endif /* RADV_DESCRIPTOR_SET_H */
diff --git a/lib/mesa/src/amd/vulkan/radv_device.c b/lib/mesa/src/amd/vulkan/radv_device.c
index 33c75c2a3..94a2ef006 100644
--- a/lib/mesa/src/amd/vulkan/radv_device.c
+++ b/lib/mesa/src/amd/vulkan/radv_device.c
@@ -30,10 +30,8 @@
#include <unistd.h>
#include <fcntl.h>
#include "radv_private.h"
-#include "radv_cs.h"
-#include "util/disk_cache.h"
#include "util/strtod.h"
-#include "util/vk_util.h"
+
#include <xf86drm.h>
#include <amdgpu.h>
#include <amdgpu_drm.h>
@@ -42,150 +40,9 @@
#include "ac_llvm_util.h"
#include "vk_format.h"
#include "sid.h"
+#include "radv_timestamp.h"
#include "util/debug.h"
-
-static int
-radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
-{
- uint32_t mesa_timestamp, llvm_timestamp;
- uint16_t f = family;
- memset(uuid, 0, VK_UUID_SIZE);
- if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
- !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
- return -1;
-
- memcpy(uuid, &mesa_timestamp, 4);
- memcpy((char*)uuid + 4, &llvm_timestamp, 4);
- memcpy((char*)uuid + 8, &f, 2);
- snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
- return 0;
-}
-
-static const VkExtensionProperties instance_extensions[] = {
- {
- .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
- .specVersion = 25,
- },
-#ifdef VK_USE_PLATFORM_XCB_KHR
- {
- .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
- .specVersion = 6,
- },
-#endif
-#ifdef VK_USE_PLATFORM_XLIB_KHR
- {
- .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
- .specVersion = 6,
- },
-#endif
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
- {
- .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
- .specVersion = 6,
- },
-#endif
- {
- .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
- .specVersion = 1,
- },
-};
-
-static const VkExtensionProperties common_device_extensions[] = {
- {
- .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
- .specVersion = 68,
- },
- {
- .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
- .specVersion = 1,
- },
-};
-
-static VkResult
-radv_extensions_register(struct radv_instance *instance,
- struct radv_extensions *extensions,
- const VkExtensionProperties *new_ext,
- uint32_t num_ext)
-{
- size_t new_size;
- VkExtensionProperties *new_ptr;
-
- assert(new_ext && num_ext > 0);
-
- if (!new_ext)
- return VK_ERROR_INITIALIZATION_FAILED;
-
- new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
- new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
- new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
-
- /* Old array continues to be valid, update nothing */
- if (!new_ptr)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- memcpy(&new_ptr[extensions->num_ext], new_ext,
- num_ext * sizeof(VkExtensionProperties));
- extensions->ext_array = new_ptr;
- extensions->num_ext += num_ext;
-
- return VK_SUCCESS;
-}
-
-static void
-radv_extensions_finish(struct radv_instance *instance,
- struct radv_extensions *extensions)
-{
- assert(extensions);
-
- if (!extensions)
- radv_loge("Attemted to free invalid extension struct\n");
-
- if (extensions->ext_array)
- vk_free(&instance->alloc, extensions->ext_array);
-}
-
-static bool
-is_extension_enabled(const VkExtensionProperties *extensions,
- size_t num_ext,
- const char *name)
-{
- assert(extensions && name);
-
- for (uint32_t i = 0; i < num_ext; i++) {
- if (strcmp(name, extensions[i].extensionName) == 0)
- return true;
- }
-
- return false;
-}
+struct radv_dispatch_table dtable;
static VkResult
radv_physical_device_init(struct radv_physical_device *device,
@@ -198,7 +55,8 @@ radv_physical_device_init(struct radv_physical_device *device,
fd = open(path, O_RDWR | O_CLOEXEC);
if (fd < 0)
- return VK_ERROR_INCOMPATIBLE_DRIVER;
+ return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
+ "failed to open %s: %m", path);
version = drmGetVersion(fd);
if (!version) {
@@ -219,13 +77,11 @@ radv_physical_device_init(struct radv_physical_device *device,
assert(strlen(path) < ARRAY_SIZE(device->path));
strncpy(device->path, path, ARRAY_SIZE(device->path));
- device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
+ device->ws = radv_amdgpu_winsys_create(fd);
if (!device->ws) {
result = VK_ERROR_INCOMPATIBLE_DRIVER;
goto fail;
}
-
- device->local_fd = fd;
device->ws->query_info(device->ws, &device->rad_info);
result = radv_init_wsi(device);
if (result != VK_SUCCESS) {
@@ -233,24 +89,8 @@ radv_physical_device_init(struct radv_physical_device *device,
goto fail;
}
- if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
- radv_finish_wsi(device);
- device->ws->destroy(device->ws);
- result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
- "cannot generate UUID");
- goto fail;
- }
-
- result = radv_extensions_register(instance,
- &device->extensions,
- common_device_extensions,
- ARRAY_SIZE(common_device_extensions));
- if (result != VK_SUCCESS)
- goto fail;
-
fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
device->name = device->rad_info.name;
-
return VK_SUCCESS;
fail:
@@ -261,12 +101,41 @@ fail:
static void
radv_physical_device_finish(struct radv_physical_device *device)
{
- radv_extensions_finish(device->instance, &device->extensions);
radv_finish_wsi(device);
device->ws->destroy(device->ws);
- close(device->local_fd);
}
+static const VkExtensionProperties global_extensions[] = {
+ {
+ .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
+ .specVersion = 25,
+ },
+#ifdef VK_USE_PLATFORM_XCB_KHR
+ {
+ .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
+ .specVersion = 6,
+ },
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_KHR
+ {
+ .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
+ .specVersion = 6,
+ },
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+ {
+ .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
+ .specVersion = 5,
+ },
+#endif
+};
+
+static const VkExtensionProperties device_extensions[] = {
+ {
+ .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+ .specVersion = 68,
+ },
+};
static void *
default_alloc_func(void *pUserData, size_t size, size_t align,
@@ -295,20 +164,6 @@ static const VkAllocationCallbacks default_alloc = {
.pfnFree = default_free_func,
};
-static const struct debug_control radv_debug_options[] = {
- {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
- {"nodcc", RADV_DEBUG_NO_DCC},
- {"shaders", RADV_DEBUG_DUMP_SHADERS},
- {"nocache", RADV_DEBUG_NO_CACHE},
- {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
- {"nohiz", RADV_DEBUG_NO_HIZ},
- {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
- {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
- {"allbos", RADV_DEBUG_ALL_BOS},
- {"noibs", RADV_DEBUG_NO_IBS},
- {NULL, 0}
-};
-
VkResult radv_CreateInstance(
const VkInstanceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
@@ -336,9 +191,15 @@ VkResult radv_CreateInstance(
}
for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
- if (!is_extension_enabled(instance_extensions,
- ARRAY_SIZE(instance_extensions),
- pCreateInfo->ppEnabledExtensionNames[i]))
+ bool found = false;
+ for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) {
+ if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
+ global_extensions[j].extensionName) == 0) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
}
@@ -347,8 +208,6 @@ VkResult radv_CreateInstance(
if (!instance)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- memset(instance, 0, sizeof(*instance));
-
instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
if (pAllocator)
@@ -363,9 +222,6 @@ VkResult radv_CreateInstance(
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
- instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
- radv_debug_options);
-
*pInstance = radv_instance_to_handle(instance);
return VK_SUCCESS;
@@ -377,11 +233,10 @@ void radv_DestroyInstance(
{
RADV_FROM_HANDLE(radv_instance, instance, _instance);
- if (!instance)
- return;
-
- for (int i = 0; i < instance->physicalDeviceCount; ++i) {
- radv_physical_device_finish(instance->physicalDevices + i);
+ if (instance->physicalDeviceCount > 0) {
+ /* We support at most one physical device. */
+ assert(instance->physicalDeviceCount == 1);
+ radv_physical_device_finish(&instance->physicalDevice);
}
VG(VALGRIND_DESTROY_MEMPOOL(instance));
@@ -391,40 +246,6 @@ void radv_DestroyInstance(
vk_free(&instance->alloc, instance);
}
-static VkResult
-radv_enumerate_devices(struct radv_instance *instance)
-{
- /* TODO: Check for more devices ? */
- drmDevicePtr devices[8];
- VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
- int max_devices;
-
- instance->physicalDeviceCount = 0;
-
- max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
- if (max_devices < 1)
- return VK_ERROR_INCOMPATIBLE_DRIVER;
-
- for (unsigned i = 0; i < (unsigned)max_devices; i++) {
- if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
- devices[i]->bustype == DRM_BUS_PCI &&
- devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
-
- result = radv_physical_device_init(instance->physicalDevices +
- instance->physicalDeviceCount,
- instance,
- devices[i]->nodes[DRM_NODE_RENDER]);
- if (result == VK_SUCCESS)
- ++instance->physicalDeviceCount;
- else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
- break;
- }
- }
- drmFreeDevices(devices, max_devices);
-
- return result;
-}
-
VkResult radv_EnumeratePhysicalDevices(
VkInstance _instance,
uint32_t* pPhysicalDeviceCount,
@@ -434,22 +255,53 @@ VkResult radv_EnumeratePhysicalDevices(
VkResult result;
if (instance->physicalDeviceCount < 0) {
- result = radv_enumerate_devices(instance);
- if (result != VK_SUCCESS &&
- result != VK_ERROR_INCOMPATIBLE_DRIVER)
+ char path[20];
+ for (unsigned i = 0; i < 8; i++) {
+ snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
+ result = radv_physical_device_init(&instance->physicalDevice,
+ instance, path);
+ if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
+ break;
+ }
+
+ if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
+ instance->physicalDeviceCount = 0;
+ } else if (result == VK_SUCCESS) {
+ instance->physicalDeviceCount = 1;
+ } else {
return result;
+ }
}
+ /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL;
+ * otherwise it's an inout parameter.
+ *
+ * The Vulkan spec (git aaed022) says:
+ *
+ * pPhysicalDeviceCount is a pointer to an unsigned integer variable
+ * that is initialized with the number of devices the application is
+ * prepared to receive handles to. pname:pPhysicalDevices is pointer to
+ * an array of at least this many VkPhysicalDevice handles [...].
+ *
+ * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices
+ * overwrites the contents of the variable pointed to by
+ * pPhysicalDeviceCount with the number of physical devices in in the
+ * instance; otherwise, vkEnumeratePhysicalDevices overwrites
+ * pPhysicalDeviceCount with the number of physical handles written to
+ * pPhysicalDevices.
+ */
if (!pPhysicalDevices) {
*pPhysicalDeviceCount = instance->physicalDeviceCount;
+ } else if (*pPhysicalDeviceCount >= 1) {
+ pPhysicalDevices[0] = radv_physical_device_to_handle(&instance->physicalDevice);
+ *pPhysicalDeviceCount = 1;
+ } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount) {
+ return VK_INCOMPLETE;
} else {
- *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
- for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
- pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
+ *pPhysicalDeviceCount = 0;
}
- return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
- : VK_SUCCESS;
+ return VK_SUCCESS;
}
void radv_GetPhysicalDeviceFeatures(
@@ -465,8 +317,8 @@ void radv_GetPhysicalDeviceFeatures(
.fullDrawIndexUint32 = true,
.imageCubeArray = true,
.independentBlend = true,
- .geometryShader = true,
- .tessellationShader = true,
+ .geometryShader = false,
+ .tessellationShader = false,
.sampleRateShading = false,
.dualSrcBlend = true,
.logicOp = true,
@@ -479,63 +331,41 @@ void radv_GetPhysicalDeviceFeatures(
.wideLines = true,
.largePoints = true,
.alphaToOne = true,
- .multiViewport = true,
- .samplerAnisotropy = true,
+ .multiViewport = false,
+ .samplerAnisotropy = false, /* FINISHME */
.textureCompressionETC2 = false,
.textureCompressionASTC_LDR = false,
.textureCompressionBC = true,
.occlusionQueryPrecise = true,
- .pipelineStatisticsQuery = true,
+ .pipelineStatisticsQuery = false,
.vertexPipelineStoresAndAtomics = true,
.fragmentStoresAndAtomics = true,
.shaderTessellationAndGeometryPointSize = true,
- .shaderImageGatherExtended = true,
- .shaderStorageImageExtendedFormats = true,
+ .shaderImageGatherExtended = false,
+ .shaderStorageImageExtendedFormats = false,
.shaderStorageImageMultisample = false,
.shaderUniformBufferArrayDynamicIndexing = true,
.shaderSampledImageArrayDynamicIndexing = true,
.shaderStorageBufferArrayDynamicIndexing = true,
.shaderStorageImageArrayDynamicIndexing = true,
- .shaderStorageImageReadWithoutFormat = true,
+ .shaderStorageImageReadWithoutFormat = false,
.shaderStorageImageWriteWithoutFormat = true,
.shaderClipDistance = true,
.shaderCullDistance = true,
- .shaderFloat64 = true,
+ .shaderFloat64 = false,
.shaderInt64 = false,
.shaderInt16 = false,
- .sparseBinding = true,
- .variableMultisampleRate = true,
- .inheritedQueries = true,
+ .alphaToOne = true,
+ .variableMultisampleRate = false,
+ .inheritedQueries = false,
};
}
-void radv_GetPhysicalDeviceFeatures2KHR(
- VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceFeatures2KHR *pFeatures)
-{
- return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
-}
-
-static uint32_t radv_get_driver_version()
-{
- const char *minor_string = strchr(VERSION, '.');
- const char *patch_string = minor_string ? strchr(minor_string + 1, ','): NULL;
- int major = atoi(VERSION);
- int minor = minor_string ? atoi(minor_string + 1) : 0;
- int patch = patch_string ? atoi(patch_string + 1) : 0;
- if (strstr(VERSION, "devel")) {
- if (patch == 0) {
- patch = 99;
- if (minor == 0) {
- minor = 99;
- --major;
- } else
- --minor;
- } else
- --patch;
- }
- uint32_t version = VK_MAKE_VERSION(major, minor, patch);
- return version;
+void
+radv_device_get_cache_uuid(void *uuid)
+{
+ memset(uuid, 0, VK_UUID_SIZE);
+ snprintf(uuid, VK_UUID_SIZE, "radv-%s", RADV_TIMESTAMP);
}
void radv_GetPhysicalDeviceProperties(
@@ -544,20 +374,6 @@ void radv_GetPhysicalDeviceProperties(
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
VkSampleCountFlags sample_counts = 0xf;
-
- /* make sure that the entire descriptor set is addressable with a signed
- * 32-bit int. So the sum of all limits scaled by descriptor size has to
- * be at most 2 GiB. the combined image & samples object count as one of
- * both. This limit is for the pipeline layout, not for the set layout, but
- * there is no set limit, so we just set a pipeline limit. I don't think
- * any app is going to hit this soon. */
- size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
- (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
- 32 /* storage buffer, 32 due to potential space wasted on alignement */ +
- 32 /* sampler, largest when combined with image */ +
- 64 /* sampled image */ +
- 64 /* storage image */);
-
VkPhysicalDeviceLimits limits = {
.maxImageDimension1D = (1 << 14),
.maxImageDimension2D = (1 << 14),
@@ -571,52 +387,52 @@ void radv_GetPhysicalDeviceProperties(
.maxMemoryAllocationCount = UINT32_MAX,
.maxSamplerAllocationCount = 64 * 1024,
.bufferImageGranularity = 64, /* A cache line */
- .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
+ .sparseAddressSpaceSize = 0,
.maxBoundDescriptorSets = MAX_SETS,
- .maxPerStageDescriptorSamplers = max_descriptor_set_size,
- .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
- .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
- .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
- .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
- .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
- .maxPerStageResources = max_descriptor_set_size,
- .maxDescriptorSetSamplers = max_descriptor_set_size,
- .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
- .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
- .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
- .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
- .maxDescriptorSetSampledImages = max_descriptor_set_size,
- .maxDescriptorSetStorageImages = max_descriptor_set_size,
- .maxDescriptorSetInputAttachments = max_descriptor_set_size,
+ .maxPerStageDescriptorSamplers = 64,
+ .maxPerStageDescriptorUniformBuffers = 64,
+ .maxPerStageDescriptorStorageBuffers = 64,
+ .maxPerStageDescriptorSampledImages = 64,
+ .maxPerStageDescriptorStorageImages = 64,
+ .maxPerStageDescriptorInputAttachments = 64,
+ .maxPerStageResources = 128,
+ .maxDescriptorSetSamplers = 256,
+ .maxDescriptorSetUniformBuffers = 256,
+ .maxDescriptorSetUniformBuffersDynamic = 256,
+ .maxDescriptorSetStorageBuffers = 256,
+ .maxDescriptorSetStorageBuffersDynamic = 256,
+ .maxDescriptorSetSampledImages = 256,
+ .maxDescriptorSetStorageImages = 256,
+ .maxDescriptorSetInputAttachments = 256,
.maxVertexInputAttributes = 32,
.maxVertexInputBindings = 32,
.maxVertexInputAttributeOffset = 2047,
.maxVertexInputBindingStride = 2048,
.maxVertexOutputComponents = 128,
- .maxTessellationGenerationLevel = 64,
- .maxTessellationPatchSize = 32,
- .maxTessellationControlPerVertexInputComponents = 128,
- .maxTessellationControlPerVertexOutputComponents = 128,
- .maxTessellationControlPerPatchOutputComponents = 120,
- .maxTessellationControlTotalOutputComponents = 4096,
- .maxTessellationEvaluationInputComponents = 128,
- .maxTessellationEvaluationOutputComponents = 128,
- .maxGeometryShaderInvocations = 127,
+ .maxTessellationGenerationLevel = 0,
+ .maxTessellationPatchSize = 0,
+ .maxTessellationControlPerVertexInputComponents = 0,
+ .maxTessellationControlPerVertexOutputComponents = 0,
+ .maxTessellationControlPerPatchOutputComponents = 0,
+ .maxTessellationControlTotalOutputComponents = 0,
+ .maxTessellationEvaluationInputComponents = 0,
+ .maxTessellationEvaluationOutputComponents = 0,
+ .maxGeometryShaderInvocations = 32,
.maxGeometryInputComponents = 64,
.maxGeometryOutputComponents = 128,
.maxGeometryOutputVertices = 256,
.maxGeometryTotalOutputComponents = 1024,
.maxFragmentInputComponents = 128,
.maxFragmentOutputAttachments = 8,
- .maxFragmentDualSrcAttachments = 1,
+ .maxFragmentDualSrcAttachments = 2,
.maxFragmentCombinedOutputResources = 8,
.maxComputeSharedMemorySize = 32768,
.maxComputeWorkGroupCount = { 65535, 65535, 65535 },
- .maxComputeWorkGroupInvocations = 2048,
+ .maxComputeWorkGroupInvocations = 16 * 1024,
.maxComputeWorkGroupSize = {
- 2048,
- 2048,
- 2048
+ 16 * 1024/*devinfo->max_cs_threads*/,
+ 16 * 1024,
+ 16 * 1024
},
.subPixelPrecisionBits = 4 /* FIXME */,
.subTexelPrecisionBits = 4 /* FIXME */,
@@ -633,13 +449,13 @@ void radv_GetPhysicalDeviceProperties(
.minTexelBufferOffsetAlignment = 1,
.minUniformBufferOffsetAlignment = 4,
.minStorageBufferOffsetAlignment = 4,
- .minTexelOffset = -32,
- .maxTexelOffset = 31,
- .minTexelGatherOffset = -32,
- .maxTexelGatherOffset = 31,
- .minInterpolationOffset = -2,
- .maxInterpolationOffset = 2,
- .subPixelInterpolationOffsetBits = 8,
+ .minTexelOffset = -8,
+ .maxTexelOffset = 7,
+ .minTexelGatherOffset = -8,
+ .maxTexelGatherOffset = 7,
+ .minInterpolationOffset = 0, /* FIXME */
+ .maxInterpolationOffset = 0, /* FIXME */
+ .subPixelInterpolationOffsetBits = 0, /* FIXME */
.maxFramebufferWidth = (1 << 14),
.maxFramebufferHeight = (1 << 14),
.maxFramebufferLayers = (1 << 10),
@@ -655,7 +471,7 @@ void radv_GetPhysicalDeviceProperties(
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = false,
- .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
+ .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
.maxClipDistances = 8,
.maxCullDistances = 8,
.maxCombinedClipAndCullDistances = 8,
@@ -672,89 +488,17 @@ void radv_GetPhysicalDeviceProperties(
};
*pProperties = (VkPhysicalDeviceProperties) {
- .apiVersion = VK_MAKE_VERSION(1, 0, 42),
- .driverVersion = radv_get_driver_version(),
+ .apiVersion = VK_MAKE_VERSION(1, 0, 5),
+ .driverVersion = 1,
.vendorID = 0x1002,
.deviceID = pdevice->rad_info.pci_id,
- .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
+ .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
.limits = limits,
- .sparseProperties = {0},
+ .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
};
strcpy(pProperties->deviceName, pdevice->name);
- memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
-}
-
-void radv_GetPhysicalDeviceProperties2KHR(
- VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceProperties2KHR *pProperties)
-{
- radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
-
- vk_foreach_struct(ext, pProperties->pNext) {
- switch (ext->sType) {
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
- VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
- (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
- properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
- break;
- }
- default:
- break;
- }
- }
-}
-
-static void radv_get_physical_device_queue_family_properties(
- struct radv_physical_device* pdevice,
- uint32_t* pCount,
- VkQueueFamilyProperties** pQueueFamilyProperties)
-{
- int num_queue_families = 1;
- int idx;
- if (pdevice->rad_info.compute_rings > 0 &&
- pdevice->rad_info.chip_class >= CIK &&
- !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
- num_queue_families++;
-
- if (pQueueFamilyProperties == NULL) {
- *pCount = num_queue_families;
- return;
- }
-
- if (!*pCount)
- return;
-
- idx = 0;
- if (*pCount >= 1) {
- *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
- .queueFlags = VK_QUEUE_GRAPHICS_BIT |
- VK_QUEUE_COMPUTE_BIT |
- VK_QUEUE_TRANSFER_BIT |
- VK_QUEUE_SPARSE_BINDING_BIT,
- .queueCount = 1,
- .timestampValidBits = 64,
- .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
- };
- idx++;
- }
-
- if (pdevice->rad_info.compute_rings > 0 &&
- pdevice->rad_info.chip_class >= CIK &&
- !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
- if (*pCount > idx) {
- *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
- .queueFlags = VK_QUEUE_COMPUTE_BIT |
- VK_QUEUE_TRANSFER_BIT |
- VK_QUEUE_SPARSE_BINDING_BIT,
- .queueCount = pdevice->rad_info.compute_rings,
- .timestampValidBits = 64,
- .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
- };
- idx++;
- }
- }
- *pCount = idx;
+ radv_device_get_cache_uuid(pProperties->pipelineCacheUUID);
}
void radv_GetPhysicalDeviceQueueFamilyProperties(
@@ -762,110 +506,62 @@ void radv_GetPhysicalDeviceQueueFamilyProperties(
uint32_t* pCount,
VkQueueFamilyProperties* pQueueFamilyProperties)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
- if (!pQueueFamilyProperties) {
- return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
+ if (pQueueFamilyProperties == NULL) {
+ *pCount = 1;
return;
}
- VkQueueFamilyProperties *properties[] = {
- pQueueFamilyProperties + 0,
- pQueueFamilyProperties + 1,
- pQueueFamilyProperties + 2,
- };
- radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
- assert(*pCount <= 3);
-}
+ assert(*pCount >= 1);
-void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
- VkPhysicalDevice physicalDevice,
- uint32_t* pCount,
- VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
-{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
- if (!pQueueFamilyProperties) {
- return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
- return;
- }
- VkQueueFamilyProperties *properties[] = {
- &pQueueFamilyProperties[0].queueFamilyProperties,
- &pQueueFamilyProperties[1].queueFamilyProperties,
- &pQueueFamilyProperties[2].queueFamilyProperties,
+ *pQueueFamilyProperties = (VkQueueFamilyProperties) {
+ .queueFlags = VK_QUEUE_GRAPHICS_BIT |
+ VK_QUEUE_COMPUTE_BIT |
+ VK_QUEUE_TRANSFER_BIT,
+ .queueCount = 1,
+ .timestampValidBits = 64,
+ .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
};
- radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
- assert(*pCount <= 3);
}
void radv_GetPhysicalDeviceMemoryProperties(
VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceMemoryProperties *pMemoryProperties)
+ VkPhysicalDeviceMemoryProperties* pMemoryProperties)
{
RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
- STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
-
- pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
- pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
+ pMemoryProperties->memoryTypeCount = 3;
+ pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
- .heapIndex = RADV_MEM_HEAP_VRAM,
- };
- pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
- .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
- .heapIndex = RADV_MEM_HEAP_GTT,
+ .heapIndex = 0,
};
- pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
+ pMemoryProperties->memoryTypes[1] = (VkMemoryType) {
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
- .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
+ .heapIndex = 0,
};
- pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
- .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ pMemoryProperties->memoryTypes[2] = (VkMemoryType) {
+ .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
- .heapIndex = RADV_MEM_HEAP_GTT,
+ .heapIndex = 1,
};
- STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
- uint64_t visible_vram_size = MIN2(physical_device->rad_info.vram_size,
- physical_device->rad_info.visible_vram_size);
-
- pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
- pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
- .size = physical_device->rad_info.vram_size -
- visible_vram_size,
- .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
- };
- pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
- .size = visible_vram_size,
+ pMemoryProperties->memoryHeapCount = 2;
+ pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) {
+ .size = physical_device->rad_info.vram_size,
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
};
- pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
+ pMemoryProperties->memoryHeaps[1] = (VkMemoryHeap) {
.size = physical_device->rad_info.gart_size,
.flags = 0,
};
}
-void radv_GetPhysicalDeviceMemoryProperties2KHR(
- VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
-{
- return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
- &pMemoryProperties->memoryProperties);
-}
-
-static int
-radv_queue_init(struct radv_device *device, struct radv_queue *queue,
- int queue_family_index, int idx)
+static VkResult
+radv_queue_init(struct radv_device *device, struct radv_queue *queue)
{
queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
queue->device = device;
- queue->queue_family_index = queue_family_index;
- queue->queue_idx = idx;
-
- queue->hw_ctx = device->ws->ctx_create(device->ws);
- if (!queue->hw_ctx)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
return VK_SUCCESS;
}
@@ -873,58 +569,6 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue,
static void
radv_queue_finish(struct radv_queue *queue)
{
- if (queue->hw_ctx)
- queue->device->ws->ctx_destroy(queue->hw_ctx);
-
- if (queue->initial_preamble_cs)
- queue->device->ws->cs_destroy(queue->initial_preamble_cs);
- if (queue->continue_preamble_cs)
- queue->device->ws->cs_destroy(queue->continue_preamble_cs);
- if (queue->descriptor_bo)
- queue->device->ws->buffer_destroy(queue->descriptor_bo);
- if (queue->scratch_bo)
- queue->device->ws->buffer_destroy(queue->scratch_bo);
- if (queue->esgs_ring_bo)
- queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
- if (queue->gsvs_ring_bo)
- queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
- if (queue->tess_factor_ring_bo)
- queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
- if (queue->tess_offchip_ring_bo)
- queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
- if (queue->compute_scratch_bo)
- queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
-}
-
-static void
-radv_device_init_gs_info(struct radv_device *device)
-{
- switch (device->physical_device->rad_info.family) {
- case CHIP_OLAND:
- case CHIP_HAINAN:
- case CHIP_KAVERI:
- case CHIP_KABINI:
- case CHIP_MULLINS:
- case CHIP_ICELAND:
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- device->gs_table_depth = 16;
- return;
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- device->gs_table_depth = 32;
- return;
- default:
- unreachable("unknown GPU");
- }
}
VkResult radv_CreateDevice(
@@ -938,9 +582,15 @@ VkResult radv_CreateDevice(
struct radv_device *device;
for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
- if (!is_extension_enabled(physical_device->extensions.ext_array,
- physical_device->extensions.num_ext,
- pCreateInfo->ppEnabledExtensionNames[i]))
+ bool found = false;
+ for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) {
+ if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
+ device_extensions[j].extensionName) == 0) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
}
@@ -950,13 +600,8 @@ VkResult radv_CreateDevice(
if (!device)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- memset(device, 0, sizeof(*device));
-
device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
device->instance = physical_device->instance;
- device->physical_device = physical_device;
-
- device->debug_flags = device->instance->debug_flags;
device->ws = physical_device->ws;
if (pAllocator)
@@ -964,156 +609,34 @@ VkResult radv_CreateDevice(
else
device->alloc = physical_device->instance->alloc;
- for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
- const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
- uint32_t qfi = queue_create->queueFamilyIndex;
-
- device->queues[qfi] = vk_alloc(&device->alloc,
- queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
- if (!device->queues[qfi]) {
- result = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto fail;
- }
-
- memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
-
- device->queue_count[qfi] = queue_create->queueCount;
-
- for (unsigned q = 0; q < queue_create->queueCount; q++) {
- result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
- if (result != VK_SUCCESS)
- goto fail;
- }
+ device->hw_ctx = device->ws->ctx_create(device->ws);
+ if (!device->hw_ctx) {
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto fail_free;
}
-#if HAVE_LLVM < 0x0400
- device->llvm_supports_spill = false;
-#else
- device->llvm_supports_spill = true;
-#endif
-
- /* The maximum number of scratch waves. Scratch space isn't divided
- * evenly between CUs. The number is only a function of the number of CUs.
- * We can decrease the constant to decrease the scratch buffer size.
- *
- * sctx->scratch_waves must be >= the maximum posible size of
- * 1 threadgroup, so that the hw doesn't hang from being unable
- * to start any.
- *
- * The recommended value is 4 per CU at most. Higher numbers don't
- * bring much benefit, but they still occupy chip resources (think
- * async compute). I've seen ~2% performance difference between 4 and 32.
- */
- uint32_t max_threads_per_block = 2048;
- device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
- max_threads_per_block / 64);
-
- radv_device_init_gs_info(device);
-
- device->tess_offchip_block_dw_size =
- device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
- device->has_distributed_tess =
- device->physical_device->rad_info.chip_class >= VI &&
- device->physical_device->rad_info.max_se >= 2;
+ radv_queue_init(device, &device->queue);
result = radv_device_init_meta(device);
- if (result != VK_SUCCESS)
- goto fail;
-
- radv_device_init_msaa(device);
-
- for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
- device->empty_cs[family] = device->ws->cs_create(device->ws, family);
- switch (family) {
- case RADV_QUEUE_GENERAL:
- radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
- radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
- radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
- break;
- case RADV_QUEUE_COMPUTE:
- radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
- radeon_emit(device->empty_cs[family], 0);
- break;
- }
- device->ws->cs_finalize(device->empty_cs[family]);
-
- device->flush_cs[family] = device->ws->cs_create(device->ws, family);
- switch (family) {
- case RADV_QUEUE_GENERAL:
- case RADV_QUEUE_COMPUTE:
- si_cs_emit_cache_flush(device->flush_cs[family],
- device->physical_device->rad_info.chip_class,
- family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
- RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_INV_SMEM_L1 |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2);
- break;
- }
- device->ws->cs_finalize(device->flush_cs[family]);
-
- device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
- switch (family) {
- case RADV_QUEUE_GENERAL:
- case RADV_QUEUE_COMPUTE:
- si_cs_emit_cache_flush(device->flush_shader_cs[family],
- device->physical_device->rad_info.chip_class,
- family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
- family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
- RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_INV_SMEM_L1 |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2);
- break;
- }
- device->ws->cs_finalize(device->flush_shader_cs[family]);
- }
-
- if (getenv("RADV_TRACE_FILE")) {
- device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
- RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
- if (!device->trace_bo)
- goto fail;
-
- device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
- if (!device->trace_id_ptr)
- goto fail;
+ if (result != VK_SUCCESS) {
+ device->ws->ctx_destroy(device->hw_ctx);
+ goto fail_free;
}
+ device->allow_fast_clears = env_var_as_boolean("RADV_FAST_CLEARS", false);
+ device->allow_dcc = !env_var_as_boolean("RADV_DCC_DISABLE", false);
- if (device->physical_device->rad_info.chip_class >= CIK)
- cik_create_gfx_config(device);
-
- VkPipelineCacheCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
- ci.pNext = NULL;
- ci.flags = 0;
- ci.pInitialData = NULL;
- ci.initialDataSize = 0;
- VkPipelineCache pc;
- result = radv_CreatePipelineCache(radv_device_to_handle(device),
- &ci, NULL, &pc);
- if (result != VK_SUCCESS)
- goto fail;
-
- device->mem_cache = radv_pipeline_cache_from_handle(pc);
+ if (device->allow_fast_clears && device->allow_dcc)
+ radv_finishme("DCC fast clears have not been tested\n");
+ radv_device_init_msaa(device);
+ device->empty_cs = device->ws->cs_create(device->ws, RING_GFX);
+ radeon_emit(device->empty_cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+ radeon_emit(device->empty_cs, CONTEXT_CONTROL_LOAD_ENABLE(1));
+ radeon_emit(device->empty_cs, CONTEXT_CONTROL_SHADOW_ENABLE(1));
+ device->ws->cs_finalize(device->empty_cs);
*pDevice = radv_device_to_handle(device);
return VK_SUCCESS;
-
-fail:
- if (device->trace_bo)
- device->ws->buffer_destroy(device->trace_bo);
-
- if (device->gfx_init)
- device->ws->buffer_destroy(device->gfx_init);
-
- for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
- for (unsigned q = 0; q < device->queue_count[i]; q++)
- radv_queue_finish(&device->queues[i][q]);
- if (device->queue_count[i])
- vk_free(&device->alloc, device->queues[i]);
- }
-
+fail_free:
vk_free(&device->alloc, device);
return result;
}
@@ -1124,32 +647,10 @@ void radv_DestroyDevice(
{
RADV_FROM_HANDLE(radv_device, device, _device);
- if (!device)
- return;
-
- if (device->trace_bo)
- device->ws->buffer_destroy(device->trace_bo);
-
- if (device->gfx_init)
- device->ws->buffer_destroy(device->gfx_init);
-
- for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
- for (unsigned q = 0; q < device->queue_count[i]; q++)
- radv_queue_finish(&device->queues[i][q]);
- if (device->queue_count[i])
- vk_free(&device->alloc, device->queues[i]);
- if (device->empty_cs[i])
- device->ws->cs_destroy(device->empty_cs[i]);
- if (device->flush_cs[i])
- device->ws->cs_destroy(device->flush_cs[i]);
- if (device->flush_shader_cs[i])
- device->ws->cs_destroy(device->flush_shader_cs[i]);
- }
+ device->ws->ctx_destroy(device->hw_ctx);
+ radv_queue_finish(&device->queue);
radv_device_finish_meta(device);
- VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
- radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
-
vk_free(&device->alloc, device);
}
@@ -1159,14 +660,14 @@ VkResult radv_EnumerateInstanceExtensionProperties(
VkExtensionProperties* pProperties)
{
if (pProperties == NULL) {
- *pPropertyCount = ARRAY_SIZE(instance_extensions);
+ *pPropertyCount = ARRAY_SIZE(global_extensions);
return VK_SUCCESS;
}
- *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
- typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
+ *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions));
+ typed_memcpy(pProperties, global_extensions, *pPropertyCount);
- if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
+ if (*pPropertyCount < ARRAY_SIZE(global_extensions))
return VK_INCOMPLETE;
return VK_SUCCESS;
@@ -1178,17 +679,15 @@ VkResult radv_EnumerateDeviceExtensionProperties(
uint32_t* pPropertyCount,
VkExtensionProperties* pProperties)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
-
if (pProperties == NULL) {
- *pPropertyCount = pdevice->extensions.num_ext;
+ *pPropertyCount = ARRAY_SIZE(device_extensions);
return VK_SUCCESS;
}
- *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
- typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
+ *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions));
+ typed_memcpy(pProperties, device_extensions, *pPropertyCount);
- if (*pPropertyCount < pdevice->extensions.num_ext)
+ if (*pPropertyCount < ARRAY_SIZE(device_extensions))
return VK_INCOMPLETE;
return VK_SUCCESS;
@@ -1223,579 +722,15 @@ VkResult radv_EnumerateDeviceLayerProperties(
void radv_GetDeviceQueue(
VkDevice _device,
- uint32_t queueFamilyIndex,
+ uint32_t queueNodeIndex,
uint32_t queueIndex,
VkQueue* pQueue)
{
RADV_FROM_HANDLE(radv_device, device, _device);
- *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
-}
-
-static void radv_dump_trace(struct radv_device *device,
- struct radeon_winsys_cs *cs)
-{
- const char *filename = getenv("RADV_TRACE_FILE");
- FILE *f = fopen(filename, "w");
- if (!f) {
- fprintf(stderr, "Failed to write trace dump to %s\n", filename);
- return;
- }
-
- fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
- device->ws->cs_dump(cs, f, *device->trace_id_ptr);
- fclose(f);
-}
-
-static void
-fill_geom_tess_rings(struct radv_queue *queue,
- uint32_t *map,
- bool add_sample_positions,
- uint32_t esgs_ring_size,
- struct radeon_winsys_bo *esgs_ring_bo,
- uint32_t gsvs_ring_size,
- struct radeon_winsys_bo *gsvs_ring_bo,
- uint32_t tess_factor_ring_size,
- struct radeon_winsys_bo *tess_factor_ring_bo,
- uint32_t tess_offchip_ring_size,
- struct radeon_winsys_bo *tess_offchip_ring_bo)
-{
- uint64_t esgs_va = 0, gsvs_va = 0;
- uint64_t tess_factor_va = 0, tess_offchip_va = 0;
- uint32_t *desc = &map[4];
-
- if (esgs_ring_bo)
- esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
- if (gsvs_ring_bo)
- gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
- if (tess_factor_ring_bo)
- tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
- if (tess_offchip_ring_bo)
- tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
-
- /* stride 0, num records - size, add tid, swizzle, elsize4,
- index stride 64 */
- desc[0] = esgs_va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
- S_008F04_STRIDE(0) |
- S_008F04_SWIZZLE_ENABLE(true);
- desc[2] = esgs_ring_size;
- desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
- S_008F0C_ELEMENT_SIZE(1) |
- S_008F0C_INDEX_STRIDE(3) |
- S_008F0C_ADD_TID_ENABLE(true);
-
- desc += 4;
- /* GS entry for ES->GS ring */
- /* stride 0, num records - size, elsize0,
- index stride 0 */
- desc[0] = esgs_va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
- S_008F04_STRIDE(0) |
- S_008F04_SWIZZLE_ENABLE(false);
- desc[2] = esgs_ring_size;
- desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
- S_008F0C_ELEMENT_SIZE(0) |
- S_008F0C_INDEX_STRIDE(0) |
- S_008F0C_ADD_TID_ENABLE(false);
-
- desc += 4;
- /* VS entry for GS->VS ring */
- /* stride 0, num records - size, elsize0,
- index stride 0 */
- desc[0] = gsvs_va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
- S_008F04_STRIDE(0) |
- S_008F04_SWIZZLE_ENABLE(false);
- desc[2] = gsvs_ring_size;
- desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
- S_008F0C_ELEMENT_SIZE(0) |
- S_008F0C_INDEX_STRIDE(0) |
- S_008F0C_ADD_TID_ENABLE(false);
- desc += 4;
-
- /* stride gsvs_itemsize, num records 64
- elsize 4, index stride 16 */
- /* shader will patch stride and desc[2] */
- desc[0] = gsvs_va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
- S_008F04_STRIDE(0) |
- S_008F04_SWIZZLE_ENABLE(true);
- desc[2] = 0;
- desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
- S_008F0C_ELEMENT_SIZE(1) |
- S_008F0C_INDEX_STRIDE(1) |
- S_008F0C_ADD_TID_ENABLE(true);
- desc += 4;
-
- desc[0] = tess_factor_va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
- S_008F04_STRIDE(0) |
- S_008F04_SWIZZLE_ENABLE(false);
- desc[2] = tess_factor_ring_size;
- desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
- S_008F0C_ELEMENT_SIZE(0) |
- S_008F0C_INDEX_STRIDE(0) |
- S_008F0C_ADD_TID_ENABLE(false);
- desc += 4;
-
- desc[0] = tess_offchip_va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
- S_008F04_STRIDE(0) |
- S_008F04_SWIZZLE_ENABLE(false);
- desc[2] = tess_offchip_ring_size;
- desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
- S_008F0C_ELEMENT_SIZE(0) |
- S_008F0C_INDEX_STRIDE(0) |
- S_008F0C_ADD_TID_ENABLE(false);
- desc += 4;
-
- /* add sample positions after all rings */
- memcpy(desc, queue->device->sample_locations_1x, 8);
- desc += 2;
- memcpy(desc, queue->device->sample_locations_2x, 16);
- desc += 4;
- memcpy(desc, queue->device->sample_locations_4x, 32);
- desc += 8;
- memcpy(desc, queue->device->sample_locations_8x, 64);
- desc += 16;
- memcpy(desc, queue->device->sample_locations_16x, 128);
-}
-
-static unsigned
-radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
-{
- bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
- device->physical_device->rad_info.family != CHIP_CARRIZO &&
- device->physical_device->rad_info.family != CHIP_STONEY;
- unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
- unsigned max_offchip_buffers = max_offchip_buffers_per_se *
- device->physical_device->rad_info.max_se;
- unsigned offchip_granularity;
- unsigned hs_offchip_param;
- switch (device->tess_offchip_block_dw_size) {
- default:
- assert(0);
- /* fall through */
- case 8192:
- offchip_granularity = V_03093C_X_8K_DWORDS;
- break;
- case 4096:
- offchip_granularity = V_03093C_X_4K_DWORDS;
- break;
- }
-
- switch (device->physical_device->rad_info.chip_class) {
- case SI:
- max_offchip_buffers = MIN2(max_offchip_buffers, 126);
- break;
- case CIK:
- max_offchip_buffers = MIN2(max_offchip_buffers, 508);
- break;
- case VI:
- default:
- max_offchip_buffers = MIN2(max_offchip_buffers, 512);
- break;
- }
-
- *max_offchip_buffers_p = max_offchip_buffers;
- if (device->physical_device->rad_info.chip_class >= CIK) {
- if (device->physical_device->rad_info.chip_class >= VI)
- --max_offchip_buffers;
- hs_offchip_param =
- S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
- S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
- } else {
- hs_offchip_param =
- S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
- }
- return hs_offchip_param;
-}
-
-static VkResult
-radv_get_preamble_cs(struct radv_queue *queue,
- uint32_t scratch_size,
- uint32_t compute_scratch_size,
- uint32_t esgs_ring_size,
- uint32_t gsvs_ring_size,
- bool needs_tess_rings,
- bool needs_sample_positions,
- struct radeon_winsys_cs **initial_preamble_cs,
- struct radeon_winsys_cs **continue_preamble_cs)
-{
- struct radeon_winsys_bo *scratch_bo = NULL;
- struct radeon_winsys_bo *descriptor_bo = NULL;
- struct radeon_winsys_bo *compute_scratch_bo = NULL;
- struct radeon_winsys_bo *esgs_ring_bo = NULL;
- struct radeon_winsys_bo *gsvs_ring_bo = NULL;
- struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
- struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
- struct radeon_winsys_cs *dest_cs[2] = {0};
- bool add_tess_rings = false, add_sample_positions = false;
- unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
- unsigned max_offchip_buffers;
- unsigned hs_offchip_param = 0;
- if (!queue->has_tess_rings) {
- if (needs_tess_rings)
- add_tess_rings = true;
- }
- if (!queue->has_sample_positions) {
- if (needs_sample_positions)
- add_sample_positions = true;
- }
- tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
- hs_offchip_param = radv_get_hs_offchip_param(queue->device,
- &max_offchip_buffers);
- tess_offchip_ring_size = max_offchip_buffers *
- queue->device->tess_offchip_block_dw_size * 4;
-
- if (scratch_size <= queue->scratch_size &&
- compute_scratch_size <= queue->compute_scratch_size &&
- esgs_ring_size <= queue->esgs_ring_size &&
- gsvs_ring_size <= queue->gsvs_ring_size &&
- !add_tess_rings && !add_sample_positions &&
- queue->initial_preamble_cs) {
- *initial_preamble_cs = queue->initial_preamble_cs;
- *continue_preamble_cs = queue->continue_preamble_cs;
- if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
- *continue_preamble_cs = NULL;
- return VK_SUCCESS;
- }
-
- if (scratch_size > queue->scratch_size) {
- scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
- scratch_size,
- 4096,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_CPU_ACCESS);
- if (!scratch_bo)
- goto fail;
- } else
- scratch_bo = queue->scratch_bo;
-
- if (compute_scratch_size > queue->compute_scratch_size) {
- compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
- compute_scratch_size,
- 4096,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_CPU_ACCESS);
- if (!compute_scratch_bo)
- goto fail;
-
- } else
- compute_scratch_bo = queue->compute_scratch_bo;
-
- if (esgs_ring_size > queue->esgs_ring_size) {
- esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
- esgs_ring_size,
- 4096,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_CPU_ACCESS);
- if (!esgs_ring_bo)
- goto fail;
- } else {
- esgs_ring_bo = queue->esgs_ring_bo;
- esgs_ring_size = queue->esgs_ring_size;
- }
-
- if (gsvs_ring_size > queue->gsvs_ring_size) {
- gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
- gsvs_ring_size,
- 4096,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_CPU_ACCESS);
- if (!gsvs_ring_bo)
- goto fail;
- } else {
- gsvs_ring_bo = queue->gsvs_ring_bo;
- gsvs_ring_size = queue->gsvs_ring_size;
- }
-
- if (add_tess_rings) {
- tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
- tess_factor_ring_size,
- 256,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_CPU_ACCESS);
- if (!tess_factor_ring_bo)
- goto fail;
- tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
- tess_offchip_ring_size,
- 256,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_CPU_ACCESS);
- if (!tess_offchip_ring_bo)
- goto fail;
- } else {
- tess_factor_ring_bo = queue->tess_factor_ring_bo;
- tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
- }
-
- if (scratch_bo != queue->scratch_bo ||
- esgs_ring_bo != queue->esgs_ring_bo ||
- gsvs_ring_bo != queue->gsvs_ring_bo ||
- tess_factor_ring_bo != queue->tess_factor_ring_bo ||
- tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
- uint32_t size = 0;
- if (gsvs_ring_bo || esgs_ring_bo ||
- tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
- size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
- if (add_sample_positions)
- size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
- }
- else if (scratch_bo)
- size = 8; /* 2 dword */
-
- descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
- size,
- 4096,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS);
- if (!descriptor_bo)
- goto fail;
- } else
- descriptor_bo = queue->descriptor_bo;
-
- for(int i = 0; i < 2; ++i) {
- struct radeon_winsys_cs *cs = NULL;
- cs = queue->device->ws->cs_create(queue->device->ws,
- queue->queue_family_index ? RING_COMPUTE : RING_GFX);
- if (!cs)
- goto fail;
-
- dest_cs[i] = cs;
-
- if (scratch_bo)
- queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
-
- if (esgs_ring_bo)
- queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
-
- if (gsvs_ring_bo)
- queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
-
- if (tess_factor_ring_bo)
- queue->device->ws->cs_add_buffer(cs, tess_factor_ring_bo, 8);
-
- if (tess_offchip_ring_bo)
- queue->device->ws->cs_add_buffer(cs, tess_offchip_ring_bo, 8);
-
- if (descriptor_bo)
- queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
-
- if (descriptor_bo != queue->descriptor_bo) {
- uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
-
- if (scratch_bo) {
- uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
- uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
- S_008F04_SWIZZLE_ENABLE(1);
- map[0] = scratch_va;
- map[1] = rsrc1;
- }
-
- if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
- add_sample_positions)
- fill_geom_tess_rings(queue, map, add_sample_positions,
- esgs_ring_size, esgs_ring_bo,
- gsvs_ring_size, gsvs_ring_bo,
- tess_factor_ring_size, tess_factor_ring_bo,
- tess_offchip_ring_size, tess_offchip_ring_bo);
-
- queue->device->ws->buffer_unmap(descriptor_bo);
- }
-
- if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
- }
-
- if (esgs_ring_bo || gsvs_ring_bo) {
- if (queue->device->physical_device->rad_info.chip_class >= CIK) {
- radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
- radeon_emit(cs, esgs_ring_size >> 8);
- radeon_emit(cs, gsvs_ring_size >> 8);
- } else {
- radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
- radeon_emit(cs, esgs_ring_size >> 8);
- radeon_emit(cs, gsvs_ring_size >> 8);
- }
- }
-
- if (tess_factor_ring_bo) {
- uint64_t tf_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
- if (queue->device->physical_device->rad_info.chip_class >= CIK) {
- radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
- S_030938_SIZE(tess_factor_ring_size / 4));
- radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
- tf_va >> 8);
- radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
- } else {
- radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
- S_008988_SIZE(tess_factor_ring_size / 4));
- radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
- tf_va >> 8);
- radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
- hs_offchip_param);
- }
- }
-
- if (descriptor_bo) {
- uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
- R_00B130_SPI_SHADER_USER_DATA_VS_0,
- R_00B230_SPI_SHADER_USER_DATA_GS_0,
- R_00B330_SPI_SHADER_USER_DATA_ES_0,
- R_00B430_SPI_SHADER_USER_DATA_HS_0,
- R_00B530_SPI_SHADER_USER_DATA_LS_0};
-
- uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
-
- for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
- radeon_set_sh_reg_seq(cs, regs[i], 2);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- }
- }
-
- if (compute_scratch_bo) {
- uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
- uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
- S_008F04_SWIZZLE_ENABLE(1);
-
- queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
-
- radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
- radeon_emit(cs, scratch_va);
- radeon_emit(cs, rsrc1);
- }
-
- if (!i) {
- si_cs_emit_cache_flush(cs,
- queue->device->physical_device->rad_info.chip_class,
- queue->queue_family_index == RING_COMPUTE &&
- queue->device->physical_device->rad_info.chip_class >= CIK,
- RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_INV_SMEM_L1 |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2);
- }
-
- if (!queue->device->ws->cs_finalize(cs))
- goto fail;
- }
-
- if (queue->initial_preamble_cs)
- queue->device->ws->cs_destroy(queue->initial_preamble_cs);
-
- if (queue->continue_preamble_cs)
- queue->device->ws->cs_destroy(queue->continue_preamble_cs);
-
- queue->initial_preamble_cs = dest_cs[0];
- queue->continue_preamble_cs = dest_cs[1];
-
- if (scratch_bo != queue->scratch_bo) {
- if (queue->scratch_bo)
- queue->device->ws->buffer_destroy(queue->scratch_bo);
- queue->scratch_bo = scratch_bo;
- queue->scratch_size = scratch_size;
- }
-
- if (compute_scratch_bo != queue->compute_scratch_bo) {
- if (queue->compute_scratch_bo)
- queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
- queue->compute_scratch_bo = compute_scratch_bo;
- queue->compute_scratch_size = compute_scratch_size;
- }
-
- if (esgs_ring_bo != queue->esgs_ring_bo) {
- if (queue->esgs_ring_bo)
- queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
- queue->esgs_ring_bo = esgs_ring_bo;
- queue->esgs_ring_size = esgs_ring_size;
- }
-
- if (gsvs_ring_bo != queue->gsvs_ring_bo) {
- if (queue->gsvs_ring_bo)
- queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
- queue->gsvs_ring_bo = gsvs_ring_bo;
- queue->gsvs_ring_size = gsvs_ring_size;
- }
-
- if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
- queue->tess_factor_ring_bo = tess_factor_ring_bo;
- }
-
- if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
- queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
- queue->has_tess_rings = true;
- }
-
- if (descriptor_bo != queue->descriptor_bo) {
- if (queue->descriptor_bo)
- queue->device->ws->buffer_destroy(queue->descriptor_bo);
-
- queue->descriptor_bo = descriptor_bo;
- }
-
- if (add_sample_positions)
- queue->has_sample_positions = true;
+ assert(queueIndex == 0);
- *initial_preamble_cs = queue->initial_preamble_cs;
- *continue_preamble_cs = queue->continue_preamble_cs;
- if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
- *continue_preamble_cs = NULL;
- return VK_SUCCESS;
-fail:
- for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
- if (dest_cs[i])
- queue->device->ws->cs_destroy(dest_cs[i]);
- if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
- queue->device->ws->buffer_destroy(descriptor_bo);
- if (scratch_bo && scratch_bo != queue->scratch_bo)
- queue->device->ws->buffer_destroy(scratch_bo);
- if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
- queue->device->ws->buffer_destroy(compute_scratch_bo);
- if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
- queue->device->ws->buffer_destroy(esgs_ring_bo);
- if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
- queue->device->ws->buffer_destroy(gsvs_ring_bo);
- if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
- queue->device->ws->buffer_destroy(tess_factor_ring_bo);
- if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
- queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
- return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ *pQueue = radv_queue_to_handle(&device->queue);
}
VkResult radv_QueueSubmit(
@@ -1807,129 +742,40 @@ VkResult radv_QueueSubmit(
RADV_FROM_HANDLE(radv_queue, queue, _queue);
RADV_FROM_HANDLE(radv_fence, fence, _fence);
struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
- struct radeon_winsys_ctx *ctx = queue->hw_ctx;
+ struct radeon_winsys_ctx *ctx = queue->device->hw_ctx;
int ret;
- uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
- uint32_t scratch_size = 0;
- uint32_t compute_scratch_size = 0;
- uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
- struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
- VkResult result;
- bool fence_emitted = false;
- bool tess_rings_needed = false;
- bool sample_positions_needed = false;
-
- /* Do this first so failing to allocate scratch buffers can't result in
- * partially executed submissions. */
- for (uint32_t i = 0; i < submitCount; i++) {
- for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
- pSubmits[i].pCommandBuffers[j]);
-
- scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
- compute_scratch_size = MAX2(compute_scratch_size,
- cmd_buffer->compute_scratch_size_needed);
- esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
- gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
- tess_rings_needed |= cmd_buffer->tess_rings_needed;
- sample_positions_needed |= cmd_buffer->sample_positions_needed;
- }
- }
-
- result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
- esgs_ring_size, gsvs_ring_size, tess_rings_needed,
- sample_positions_needed,
- &initial_preamble_cs, &continue_preamble_cs);
- if (result != VK_SUCCESS)
- return result;
for (uint32_t i = 0; i < submitCount; i++) {
struct radeon_winsys_cs **cs_array;
- bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
- bool can_patch = !do_flush;
- uint32_t advance;
-
- if (!pSubmits[i].commandBufferCount) {
- if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
- ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
- &queue->device->empty_cs[queue->queue_family_index],
- 1, NULL, NULL,
- (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
- pSubmits[i].waitSemaphoreCount,
- (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
- pSubmits[i].signalSemaphoreCount,
- false, base_fence);
- if (ret) {
- radv_loge("failed to submit CS %d\n", i);
- abort();
- }
- fence_emitted = true;
- }
+ bool can_patch = true;
+
+ if (!pSubmits[i].commandBufferCount)
continue;
- }
cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
- (pSubmits[i].commandBufferCount + do_flush));
-
- if(do_flush)
- cs_array[0] = pSubmits[i].waitSemaphoreCount ?
- queue->device->flush_shader_cs[queue->queue_family_index] :
- queue->device->flush_cs[queue->queue_family_index];
+ pSubmits[i].commandBufferCount);
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
pSubmits[i].pCommandBuffers[j]);
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
- cs_array[j + do_flush] = cmd_buffer->cs;
+ cs_array[j] = cmd_buffer->cs;
if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
can_patch = false;
}
-
- for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
- advance = MIN2(max_cs_submission,
- pSubmits[i].commandBufferCount + do_flush - j);
- bool b = j == 0;
- bool e = j + advance == pSubmits[i].commandBufferCount + do_flush;
-
- if (queue->device->trace_bo)
- *queue->device->trace_id_ptr = 0;
-
- ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
- advance, initial_preamble_cs, continue_preamble_cs,
- (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
- b ? pSubmits[i].waitSemaphoreCount : 0,
- (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
- e ? pSubmits[i].signalSemaphoreCount : 0,
- can_patch, base_fence);
-
- if (ret) {
- radv_loge("failed to submit CS %d\n", i);
- abort();
- }
- fence_emitted = true;
- if (queue->device->trace_bo) {
- bool success = queue->device->ws->ctx_wait_idle(
- queue->hw_ctx,
- radv_queue_family_to_ring(
- queue->queue_family_index),
- queue->queue_idx);
-
- if (!success) { /* Hang */
- radv_dump_trace(queue->device, cs_array[j]);
- abort();
- }
- }
- }
+ ret = queue->device->ws->cs_submit(ctx, cs_array,
+ pSubmits[i].commandBufferCount,
+ can_patch, base_fence);
+ if (ret)
+ radv_loge("failed to submit CS %d\n", i);
free(cs_array);
}
if (fence) {
- if (!fence_emitted)
- ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
- &queue->device->empty_cs[queue->queue_family_index],
- 1, NULL, NULL, NULL, 0, NULL, 0,
- false, base_fence);
+ if (!submitCount)
+ ret = queue->device->ws->cs_submit(ctx, &queue->device->empty_cs,
+ 1, false, base_fence);
fence->submitted = true;
}
@@ -1942,9 +788,7 @@ VkResult radv_QueueWaitIdle(
{
RADV_FROM_HANDLE(radv_queue, queue, _queue);
- queue->device->ws->ctx_wait_idle(queue->hw_ctx,
- radv_queue_family_to_ring(queue->queue_family_index),
- queue->queue_idx);
+ queue->device->ws->ctx_wait_idle(queue->device->hw_ctx);
return VK_SUCCESS;
}
@@ -1953,11 +797,7 @@ VkResult radv_DeviceWaitIdle(
{
RADV_FROM_HANDLE(radv_device, device, _device);
- for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
- for (unsigned q = 0; q < device->queue_count[i]; q++) {
- radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
- }
- }
+ device->ws->ctx_wait_idle(device->hw_ctx);
return VK_SUCCESS;
}
@@ -1991,21 +831,6 @@ PFN_vkVoidFunction radv_GetDeviceProcAddr(
return radv_lookup_entrypoint(pName);
}
-bool radv_get_memory_fd(struct radv_device *device,
- struct radv_device_memory *memory,
- int *pFD)
-{
- struct radeon_bo_metadata metadata;
-
- if (memory->image) {
- radv_init_metadata(device, memory->image, &metadata);
- device->ws->buffer_set_metadata(memory->bo, &metadata);
- }
-
- return device->ws->buffer_get_fd(device->ws, memory->bo,
- pFD);
-}
-
VkResult radv_AllocateMemory(
VkDevice _device,
const VkMemoryAllocateInfo* pAllocateInfo,
@@ -2017,7 +842,6 @@ VkResult radv_AllocateMemory(
VkResult result;
enum radeon_bo_domain domain;
uint32_t flags = 0;
- const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info = NULL;
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
if (pAllocateInfo->allocationSize == 0) {
@@ -2026,45 +850,22 @@ VkResult radv_AllocateMemory(
return VK_SUCCESS;
}
- vk_foreach_struct(ext, pAllocateInfo->pNext) {
- switch (ext->sType) {
- case VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV:
- dedicate_info = (const VkDedicatedAllocationMemoryAllocateInfoNV *)ext;
- break;
- default:
- break;
- }
- }
-
mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (mem == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- if (dedicate_info) {
- mem->image = radv_image_from_handle(dedicate_info->image);
- mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
- } else {
- mem->image = NULL;
- mem->buffer = NULL;
- }
-
uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
- if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
- pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
+ if (pAllocateInfo->memoryTypeIndex == 2)
domain = RADEON_DOMAIN_GTT;
else
domain = RADEON_DOMAIN_VRAM;
- if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
+ if (pAllocateInfo->memoryTypeIndex == 0)
flags |= RADEON_FLAG_NO_CPU_ACCESS;
else
flags |= RADEON_FLAG_CPU_ACCESS;
-
- if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
- flags |= RADEON_FLAG_GTT_WC;
-
- mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
+ mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768,
domain, flags);
if (!mem->bo) {
@@ -2161,14 +962,19 @@ void radv_GetBufferMemoryRequirements(
{
RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
- pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
-
- if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
- pMemoryRequirements->alignment = 4096;
- else
- pMemoryRequirements->alignment = 16;
+ /* The Vulkan spec (git aaed022) says:
+ *
+ * memoryTypeBits is a bitfield and contains one bit set for every
+ * supported memory type for the resource. The bit `1<<i` is set if and
+ * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
+ * structure for the physical device is supported.
+ *
+ * We support exactly one memory type.
+ */
+ pMemoryRequirements->memoryTypeBits = 0x7;
- pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
+ pMemoryRequirements->size = buffer->size;
+ pMemoryRequirements->alignment = 16;
}
void radv_GetImageMemoryRequirements(
@@ -2178,7 +984,16 @@ void radv_GetImageMemoryRequirements(
{
RADV_FROM_HANDLE(radv_image, image, _image);
- pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
+ /* The Vulkan spec (git aaed022) says:
+ *
+ * memoryTypeBits is a bitfield and contains one bit set for every
+ * supported memory type for the resource. The bit `1<<i` is set if and
+ * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
+ * structure for the physical device is supported.
+ *
+ * We support exactly one memory type.
+ */
+ pMemoryRequirements->memoryTypeBits = 0x7;
pMemoryRequirements->size = image->size;
pMemoryRequirements->alignment = image->alignment;
@@ -2241,89 +1056,13 @@ VkResult radv_BindImageMemory(
return VK_SUCCESS;
}
-
-static void
-radv_sparse_buffer_bind_memory(struct radv_device *device,
- const VkSparseBufferMemoryBindInfo *bind)
-{
- RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
-
- for (uint32_t i = 0; i < bind->bindCount; ++i) {
- struct radv_device_memory *mem = NULL;
-
- if (bind->pBinds[i].memory != VK_NULL_HANDLE)
- mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
-
- device->ws->buffer_virtual_bind(buffer->bo,
- bind->pBinds[i].resourceOffset,
- bind->pBinds[i].size,
- mem ? mem->bo : NULL,
- bind->pBinds[i].memoryOffset);
- }
-}
-
-static void
-radv_sparse_image_opaque_bind_memory(struct radv_device *device,
- const VkSparseImageOpaqueMemoryBindInfo *bind)
-{
- RADV_FROM_HANDLE(radv_image, image, bind->image);
-
- for (uint32_t i = 0; i < bind->bindCount; ++i) {
- struct radv_device_memory *mem = NULL;
-
- if (bind->pBinds[i].memory != VK_NULL_HANDLE)
- mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
-
- device->ws->buffer_virtual_bind(image->bo,
- bind->pBinds[i].resourceOffset,
- bind->pBinds[i].size,
- mem ? mem->bo : NULL,
- bind->pBinds[i].memoryOffset);
- }
-}
-
- VkResult radv_QueueBindSparse(
- VkQueue _queue,
+VkResult radv_QueueBindSparse(
+ VkQueue queue,
uint32_t bindInfoCount,
const VkBindSparseInfo* pBindInfo,
- VkFence _fence)
+ VkFence fence)
{
- RADV_FROM_HANDLE(radv_fence, fence, _fence);
- RADV_FROM_HANDLE(radv_queue, queue, _queue);
- struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
- bool fence_emitted = false;
-
- for (uint32_t i = 0; i < bindInfoCount; ++i) {
- for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
- radv_sparse_buffer_bind_memory(queue->device,
- pBindInfo[i].pBufferBinds + j);
- }
-
- for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
- radv_sparse_image_opaque_bind_memory(queue->device,
- pBindInfo[i].pImageOpaqueBinds + j);
- }
-
- if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
- queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
- &queue->device->empty_cs[queue->queue_family_index],
- 1, NULL, NULL,
- (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores,
- pBindInfo[i].waitSemaphoreCount,
- (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores,
- pBindInfo[i].signalSemaphoreCount,
- false, base_fence);
- fence_emitted = true;
- if (fence)
- fence->submitted = true;
- }
- }
-
- if (fence && !fence_emitted) {
- fence->signalled = true;
- }
-
- return VK_SUCCESS;
+ stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
}
VkResult radv_CreateFence(
@@ -2344,10 +1083,7 @@ VkResult radv_CreateFence(
fence->submitted = false;
fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
fence->fence = device->ws->create_fence();
- if (!fence->fence) {
- vk_free2(&device->alloc, pAllocator, fence);
- return VK_ERROR_OUT_OF_HOST_MEMORY;
- }
+
*pFence = radv_fence_to_handle(fence);
@@ -2447,33 +1183,25 @@ VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
// Queue semaphore functions
VkResult radv_CreateSemaphore(
- VkDevice _device,
+ VkDevice device,
const VkSemaphoreCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSemaphore* pSemaphore)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radeon_winsys_sem *sem;
+ /* The DRM execbuffer ioctl always execute in-oder, even between different
+ * rings. As such, there's nothing to do for the user space semaphore.
+ */
- sem = device->ws->create_sem(device->ws);
- if (!sem)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
+ *pSemaphore = (VkSemaphore)1;
- *pSemaphore = radeon_winsys_sem_to_handle(sem);
return VK_SUCCESS;
}
void radv_DestroySemaphore(
- VkDevice _device,
- VkSemaphore _semaphore,
+ VkDevice device,
+ VkSemaphore semaphore,
const VkAllocationCallbacks* pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
- if (!_semaphore)
- return;
-
- device->ws->destroy_sem(sem);
}
VkResult radv_CreateEvent(
@@ -2570,17 +1298,6 @@ VkResult radv_CreateBuffer(
buffer->usage = pCreateInfo->usage;
buffer->bo = NULL;
buffer->offset = 0;
- buffer->flags = pCreateInfo->flags;
-
- if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
- buffer->bo = device->ws->buffer_create(device->ws,
- align64(buffer->size, 4096),
- 4096, 0, RADEON_FLAG_VIRTUAL);
- if (!buffer->bo) {
- vk_free2(&device->alloc, pAllocator, buffer);
- return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
- }
- }
*pBuffer = radv_buffer_to_handle(buffer);
@@ -2598,9 +1315,6 @@ void radv_DestroyBuffer(
if (!buffer)
return;
- if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
- device->ws->buffer_destroy(buffer->bo);
-
vk_free2(&device->alloc, pAllocator, buffer);
}
@@ -2613,11 +1327,6 @@ si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
return image->surface.tiling_index[level];
}
-static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
-{
- return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
-}
-
static void
radv_initialise_color_surface(struct radv_device *device,
struct radv_color_buffer_info *cb,
@@ -2649,9 +1358,8 @@ radv_initialise_color_surface(struct radv_device *device,
va += iview->image->dcc_offset;
cb->cb_dcc_base = va >> 8;
- uint32_t max_slice = radv_surface_layer_count(iview);
cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
- S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
+ S_028C6C_SLICE_MAX(iview->base_layer + iview->extent.depth - 1);
cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
pitch_tile_max = level_info->nblk_x / 8 - 1;
@@ -2674,14 +1382,14 @@ radv_initialise_color_surface(struct radv_device *device,
if (iview->image->fmask.size) {
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
- if (device->physical_device->rad_info.chip_class >= CIK)
+ if (device->instance->physicalDevice.rad_info.chip_class >= CIK)
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
cb->cb_color_fmask = va >> 8;
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
} else {
/* This must be set for fast clear to work without FMASK. */
- if (device->physical_device->rad_info.chip_class >= CIK)
+ if (device->instance->physicalDevice.rad_info.chip_class >= CIK)
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
cb->cb_color_fmask = cb->cb_color_base;
@@ -2734,14 +1442,13 @@ radv_initialise_color_surface(struct radv_device *device,
if (iview->image->fmask.size)
cb->cb_color_info |= S_028C70_COMPRESSION(1);
- if (iview->image->cmask.size &&
- !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
+ if (iview->image->cmask.size && device->allow_fast_clears)
cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
if (iview->image->surface.dcc_size && level_info->dcc_enabled)
cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
- if (device->physical_device->rad_info.chip_class >= VI) {
+ if (device->instance->physicalDevice.rad_info.chip_class >= VI) {
unsigned max_uncompressed_block_size = 2;
if (iview->image->samples > 1) {
if (iview->image->surface.bpe == 1)
@@ -2756,7 +1463,7 @@ radv_initialise_color_surface(struct radv_device *device,
/* This must be set for fast clear to work without FMASK. */
if (!iview->image->fmask.size &&
- device->physical_device->rad_info.chip_class == SI) {
+ device->instance->physicalDevice.rad_info.chip_class == SI) {
unsigned bankh = util_logbase2(iview->image->surface.bankh);
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
}
@@ -2771,7 +1478,6 @@ radv_initialise_ds_surface(struct radv_device *device,
unsigned format;
uint64_t va, s_offs, z_offs;
const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
- bool stencil_only = false;
memset(ds, 0, sizeof(*ds));
switch (iview->vk_format) {
case VK_FORMAT_D24_UNORM_S8_UINT:
@@ -2790,24 +1496,22 @@ radv_initialise_ds_surface(struct radv_device *device,
S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
ds->offset_scale = 1.0f;
break;
- case VK_FORMAT_S8_UINT:
- stencil_only = true;
- level_info = &iview->image->surface.stencil_level[level];
- break;
default:
break;
}
format = radv_translate_dbformat(iview->vk_format);
+ if (format == V_028040_Z_INVALID) {
+ fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
+ }
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
s_offs = z_offs = va;
z_offs += iview->image->surface.level[level].offset;
s_offs += iview->image->surface.stencil_level[level].offset;
- uint32_t max_slice = radv_surface_layer_count(iview);
ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
- S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
+ S_028008_SLICE_MAX(iview->base_layer + iview->extent.depth - 1);
ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
@@ -2819,8 +1523,8 @@ radv_initialise_ds_surface(struct radv_device *device,
else
ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
- if (device->physical_device->rad_info.chip_class >= CIK) {
- struct radeon_info *info = &device->physical_device->rad_info;
+ if (device->instance->physicalDevice.rad_info.chip_class >= CIK) {
+ struct radeon_info *info = &device->instance->physicalDevice.rad_info;
unsigned tiling_index = iview->image->surface.tiling_index[level];
unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
unsigned macro_index = iview->image->surface.macro_tile_index;
@@ -2828,9 +1532,6 @@ radv_initialise_ds_surface(struct radv_device *device,
unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
- if (stencil_only)
- tile_mode = stencil_tile_mode;
-
ds->db_depth_info |=
S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
@@ -2845,11 +1546,9 @@ radv_initialise_ds_surface(struct radv_device *device,
ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
tile_mode_index = si_tile_mode_index(iview->image, level, true);
ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
- if (stencil_only)
- ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
}
- if (iview->image->surface.htile_size && !level) {
+ if (iview->image->htile.size && !level) {
ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
S_028040_ALLOW_EXPCLEAR(1);
@@ -2872,7 +1571,7 @@ radv_initialise_ds_surface(struct radv_device *device,
ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
- iview->image->htile_offset;
+ iview->image->htile.offset;
ds->db_htile_data_base = va >> 8;
ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
} else {
@@ -2907,9 +1606,6 @@ VkResult radv_CreateFramebuffer(
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
framebuffer->attachment_count = pCreateInfo->attachmentCount;
- framebuffer->width = pCreateInfo->width;
- framebuffer->height = pCreateInfo->height;
- framebuffer->layers = pCreateInfo->layers;
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
VkImageView _iview = pCreateInfo->pAttachments[i];
struct radv_image_view *iview = radv_image_view_from_handle(_iview);
@@ -2919,11 +1615,12 @@ VkResult radv_CreateFramebuffer(
} else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
}
- framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
- framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
- framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
}
+ framebuffer->width = pCreateInfo->width;
+ framebuffer->height = pCreateInfo->height;
+ framebuffer->layers = pCreateInfo->layers;
+
*pFramebuffer = radv_framebuffer_to_handle(framebuffer);
return VK_SUCCESS;
}
@@ -3057,7 +1754,14 @@ radv_init_sampler(struct radv_device *device,
uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
(uint32_t) pCreateInfo->maxAnisotropy : 0;
uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
- bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
+ bool is_vi;
+ is_vi = (device->instance->physicalDevice.rad_info.chip_class >= VI);
+
+ if (!is_vi && max_aniso > 0) {
+ radv_finishme("Anisotropic filtering must be disabled manually "
+ "by the shader on SI-CI when BASE_LEVEL == LAST_LEVEL\n");
+ max_aniso = max_aniso_ratio = 0;
+ }
sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
@@ -3076,7 +1780,7 @@ radv_init_sampler(struct radv_device *device,
S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
- S_008F38_MIP_POINT_PRECLAMP(0) |
+ S_008F38_MIP_POINT_PRECLAMP(1) |
S_008F38_DISABLE_LSB_CEIL(1) |
S_008F38_FILTER_PREC_FIX(1) |
S_008F38_ANISO_OVERRIDE(is_vi));
@@ -3118,48 +1822,3 @@ void radv_DestroySampler(
return;
vk_free2(&device->alloc, pAllocator, sampler);
}
-
-
-/* vk_icd.h does not declare this function, so we declare it here to
- * suppress Wmissing-prototypes.
- */
-PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
-vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
-
-PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
-vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
-{
- /* For the full details on loader interface versioning, see
- * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
- * What follows is a condensed summary, to help you navigate the large and
- * confusing official doc.
- *
- * - Loader interface v0 is incompatible with later versions. We don't
- * support it.
- *
- * - In loader interface v1:
- * - The first ICD entrypoint called by the loader is
- * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
- * entrypoint.
- * - The ICD must statically expose no other Vulkan symbol unless it is
- * linked with -Bsymbolic.
- * - Each dispatchable Vulkan handle created by the ICD must be
- * a pointer to a struct whose first member is VK_LOADER_DATA. The
- * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
- * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
- * vkDestroySurfaceKHR(). The ICD must be capable of working with
- * such loader-managed surfaces.
- *
- * - Loader interface v2 differs from v1 in:
- * - The first ICD entrypoint called by the loader is
- * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
- * statically expose this entrypoint.
- *
- * - Loader interface v3 differs from v2 in:
- * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
- * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
- * because the loader no longer does so.
- */
- *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
- return VK_SUCCESS;
-}
diff --git a/lib/mesa/src/amd/vulkan/radv_entrypoints.c b/lib/mesa/src/amd/vulkan/radv_entrypoints.c
index 3f86ea1df..4ee949234 100644
--- a/lib/mesa/src/amd/vulkan/radv_entrypoints.c
+++ b/lib/mesa/src/amd/vulkan/radv_entrypoints.c
@@ -39,20 +39,20 @@ static const char strings[] =
"vkCreateInstance\0"
"vkDestroyInstance\0"
"vkEnumeratePhysicalDevices\0"
- "vkGetDeviceProcAddr\0"
- "vkGetInstanceProcAddr\0"
- "vkGetPhysicalDeviceProperties\0"
- "vkGetPhysicalDeviceQueueFamilyProperties\0"
- "vkGetPhysicalDeviceMemoryProperties\0"
"vkGetPhysicalDeviceFeatures\0"
"vkGetPhysicalDeviceFormatProperties\0"
"vkGetPhysicalDeviceImageFormatProperties\0"
+ "vkGetPhysicalDeviceProperties\0"
+ "vkGetPhysicalDeviceQueueFamilyProperties\0"
+ "vkGetPhysicalDeviceMemoryProperties\0"
+ "vkGetInstanceProcAddr\0"
+ "vkGetDeviceProcAddr\0"
"vkCreateDevice\0"
"vkDestroyDevice\0"
- "vkEnumerateInstanceLayerProperties\0"
"vkEnumerateInstanceExtensionProperties\0"
- "vkEnumerateDeviceLayerProperties\0"
"vkEnumerateDeviceExtensionProperties\0"
+ "vkEnumerateInstanceLayerProperties\0"
+ "vkEnumerateDeviceLayerProperties\0"
"vkGetDeviceQueue\0"
"vkQueueSubmit\0"
"vkQueueWaitIdle\0"
@@ -64,10 +64,10 @@ static const char strings[] =
"vkFlushMappedMemoryRanges\0"
"vkInvalidateMappedMemoryRanges\0"
"vkGetDeviceMemoryCommitment\0"
- "vkGetBufferMemoryRequirements\0"
"vkBindBufferMemory\0"
- "vkGetImageMemoryRequirements\0"
"vkBindImageMemory\0"
+ "vkGetBufferMemoryRequirements\0"
+ "vkGetImageMemoryRequirements\0"
"vkGetImageSparseMemoryRequirements\0"
"vkGetPhysicalDeviceSparseImageFormatProperties\0"
"vkQueueBindSparse\0"
@@ -183,47 +183,48 @@ static const char strings[] =
"vkGetSwapchainImagesKHR\0"
"vkAcquireNextImageKHR\0"
"vkQueuePresentKHR\0"
- "vkCreateWaylandSurfaceKHR\0"
- "vkGetPhysicalDeviceWaylandPresentationSupportKHR\0"
+ "vkGetPhysicalDeviceDisplayPropertiesKHR\0"
+ "vkGetPhysicalDeviceDisplayPlanePropertiesKHR\0"
+ "vkGetDisplayPlaneSupportedDisplaysKHR\0"
+ "vkGetDisplayModePropertiesKHR\0"
+ "vkCreateDisplayModeKHR\0"
+ "vkGetDisplayPlaneCapabilitiesKHR\0"
+ "vkCreateDisplayPlaneSurfaceKHR\0"
+ "vkCreateSharedSwapchainsKHR\0"
"vkCreateXlibSurfaceKHR\0"
"vkGetPhysicalDeviceXlibPresentationSupportKHR\0"
"vkCreateXcbSurfaceKHR\0"
"vkGetPhysicalDeviceXcbPresentationSupportKHR\0"
- "vkCmdDrawIndirectCountAMD\0"
- "vkCmdDrawIndexedIndirectCountAMD\0"
- "vkGetPhysicalDeviceFeatures2KHR\0"
- "vkGetPhysicalDeviceProperties2KHR\0"
- "vkGetPhysicalDeviceFormatProperties2KHR\0"
- "vkGetPhysicalDeviceImageFormatProperties2KHR\0"
- "vkGetPhysicalDeviceQueueFamilyProperties2KHR\0"
- "vkGetPhysicalDeviceMemoryProperties2KHR\0"
- "vkGetPhysicalDeviceSparseImageFormatProperties2KHR\0"
- "vkCmdPushDescriptorSetKHR\0"
- "vkTrimCommandPoolKHR\0"
- "vkCreateDescriptorUpdateTemplateKHR\0"
- "vkDestroyDescriptorUpdateTemplateKHR\0"
- "vkUpdateDescriptorSetWithTemplateKHR\0"
- "vkCmdPushDescriptorSetWithTemplateKHR\0"
+ "vkCreateWaylandSurfaceKHR\0"
+ "vkGetPhysicalDeviceWaylandPresentationSupportKHR\0"
+ "vkCreateMirSurfaceKHR\0"
+ "vkGetPhysicalDeviceMirPresentationSupportKHR\0"
+ "vkCreateAndroidSurfaceKHR\0"
+ "vkCreateWin32SurfaceKHR\0"
+ "vkGetPhysicalDeviceWin32PresentationSupportKHR\0"
+ "vkCreateDebugReportCallbackEXT\0"
+ "vkDestroyDebugReportCallbackEXT\0"
+ "vkDebugReportMessageEXT\0"
;
static const struct radv_entrypoint entrypoints[] = {
{ 0, 0x38a581a6 },
{ 17, 0x9bd21af2 },
{ 35, 0x5787c327 },
- { 62, 0xba013486 },
- { 82, 0x3d2ae9ad },
- { 104, 0x52fe22c9 },
- { 134, 0x4e5fc88a },
- { 175, 0xa90da4da },
- { 211, 0x113e2f33 },
- { 239, 0x3e54b398 },
- { 275, 0xdd36a867 },
+ { 62, 0x113e2f33 },
+ { 90, 0x3e54b398 },
+ { 126, 0xdd36a867 },
+ { 167, 0x52fe22c9 },
+ { 197, 0x4e5fc88a },
+ { 238, 0xa90da4da },
+ { 274, 0x3d2ae9ad },
+ { 296, 0xba013486 },
{ 316, 0x085ed23f },
{ 331, 0x1fbcc9cb },
- { 347, 0x081f69d8 },
- { 382, 0xeb27627e },
- { 421, 0x2f8566e7 },
- { 454, 0x5fd13eed },
+ { 347, 0xeb27627e },
+ { 386, 0x5fd13eed },
+ { 423, 0x081f69d8 },
+ { 458, 0x2f8566e7 },
{ 491, 0xcc920d9a },
{ 508, 0xfa4713ec },
{ 522, 0x6f8fc2a5 },
@@ -235,10 +236,10 @@ static const struct radv_entrypoint entrypoints[] = {
{ 611, 0xff52f051 },
{ 637, 0x1e115cca },
{ 668, 0x46e38db5 },
- { 696, 0xab98422a },
- { 726, 0x06bcbdcb },
- { 745, 0x916f1e63 },
- { 774, 0x5caaae4a },
+ { 696, 0x06bcbdcb },
+ { 715, 0x5caaae4a },
+ { 733, 0xab98422a },
+ { 763, 0x916f1e63 },
{ 792, 0x15855f5b },
{ 827, 0x272ef8ef },
{ 874, 0xc3628a09 },
@@ -354,27 +355,28 @@ static const struct radv_entrypoint entrypoints[] = {
{ 3126, 0x57695f28 },
{ 3150, 0xc3fedb2e },
{ 3172, 0xfc5fb6ce },
- { 3190, 0x2b2a4b79 },
- { 3216, 0x84e085ac },
- { 3265, 0xa693bc66 },
- { 3288, 0x34a063ab },
- { 3334, 0xc5e5b106 },
- { 3356, 0x41782cb9 },
- { 3401, 0xe5ad0a50 },
- { 3427, 0xc86e9287 },
- { 3460, 0x6a9a3636 },
- { 3492, 0xcd15838c },
- { 3526, 0x9099cbbb },
- { 3566, 0x102ff7ea },
- { 3611, 0x5ceb2bed },
- { 3656, 0xc8c3da3d },
- { 3696, 0x8746ed72 },
- { 3747, 0xf17232a1 },
- { 3773, 0x51177c8d },
- { 3794, 0x5189488a },
- { 3830, 0xaa83901e },
- { 3867, 0x214ad230 },
- { 3904, 0x3d528981 },
+ { 3190, 0x0fa0cd2e },
+ { 3230, 0xb9b8ddba },
+ { 3275, 0xabef4889 },
+ { 3313, 0x36b8a8de },
+ { 3343, 0xcc0bde41 },
+ { 3366, 0x4b60d48c },
+ { 3399, 0x7ac4dacb },
+ { 3430, 0x47655c4a },
+ { 3458, 0xa693bc66 },
+ { 3481, 0x34a063ab },
+ { 3527, 0xc5e5b106 },
+ { 3549, 0x41782cb9 },
+ { 3594, 0x2b2a4b79 },
+ { 3620, 0x84e085ac },
+ { 3669, 0x2ce93a55 },
+ { 3691, 0xcf1e6028 },
+ { 3736, 0x03667f4e },
+ { 3762, 0xfa2ba1e2 },
+ { 3786, 0x80e72505 },
+ { 3833, 0x0987ef56 },
+ { 3864, 0x43d4c4e2 },
+ { 3896, 0xa4e75334 },
};
@@ -387,20 +389,20 @@ static const struct radv_entrypoint entrypoints[] = {
VkResult radv_CreateInstance(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance) __attribute__ ((weak));
void radv_DestroyInstance(VkInstance instance, const VkAllocationCallbacks* pAllocator) __attribute__ ((weak));
VkResult radv_EnumeratePhysicalDevices(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices) __attribute__ ((weak));
-PFN_vkVoidFunction radv_GetDeviceProcAddr(VkDevice device, const char* pName) __attribute__ ((weak));
-PFN_vkVoidFunction radv_GetInstanceProcAddr(VkInstance instance, const char* pName) __attribute__ ((weak));
-void radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties) __attribute__ ((weak));
-void radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties) __attribute__ ((weak));
-void radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties) __attribute__ ((weak));
void radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures) __attribute__ ((weak));
void radv_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties) __attribute__ ((weak));
VkResult radv_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties) __attribute__ ((weak));
+void radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties) __attribute__ ((weak));
+void radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties) __attribute__ ((weak));
+void radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties) __attribute__ ((weak));
+PFN_vkVoidFunction radv_GetInstanceProcAddr(VkInstance instance, const char* pName) __attribute__ ((weak));
+PFN_vkVoidFunction radv_GetDeviceProcAddr(VkDevice device, const char* pName) __attribute__ ((weak));
VkResult radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) __attribute__ ((weak));
void radv_DestroyDevice(VkDevice device, const VkAllocationCallbacks* pAllocator) __attribute__ ((weak));
-VkResult radv_EnumerateInstanceLayerProperties(uint32_t* pPropertyCount, VkLayerProperties* pProperties) __attribute__ ((weak));
VkResult radv_EnumerateInstanceExtensionProperties(const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties) __attribute__ ((weak));
-VkResult radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkLayerProperties* pProperties) __attribute__ ((weak));
VkResult radv_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties) __attribute__ ((weak));
+VkResult radv_EnumerateInstanceLayerProperties(uint32_t* pPropertyCount, VkLayerProperties* pProperties) __attribute__ ((weak));
+VkResult radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkLayerProperties* pProperties) __attribute__ ((weak));
void radv_GetDeviceQueue(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue) __attribute__ ((weak));
VkResult radv_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) __attribute__ ((weak));
VkResult radv_QueueWaitIdle(VkQueue queue) __attribute__ ((weak));
@@ -412,10 +414,10 @@ void radv_UnmapMemory(VkDevice device, VkDeviceMemory memory) __attribute__ ((we
VkResult radv_FlushMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges) __attribute__ ((weak));
VkResult radv_InvalidateMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges) __attribute__ ((weak));
void radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes) __attribute__ ((weak));
-void radv_GetBufferMemoryRequirements(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements) __attribute__ ((weak));
VkResult radv_BindBufferMemory(VkDevice device, VkBuffer buffer, VkDeviceMemory memory, VkDeviceSize memoryOffset) __attribute__ ((weak));
-void radv_GetImageMemoryRequirements(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements) __attribute__ ((weak));
VkResult radv_BindImageMemory(VkDevice device, VkImage image, VkDeviceMemory memory, VkDeviceSize memoryOffset) __attribute__ ((weak));
+void radv_GetBufferMemoryRequirements(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements) __attribute__ ((weak));
+void radv_GetImageMemoryRequirements(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements) __attribute__ ((weak));
void radv_GetImageSparseMemoryRequirements(VkDevice device, VkImage image, uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements* pSparseMemoryRequirements) __attribute__ ((weak));
void radv_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pPropertyCount, VkSparseImageFormatProperties* pProperties) __attribute__ ((weak));
VkResult radv_QueueBindSparse(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo, VkFence fence) __attribute__ ((weak));
@@ -494,14 +496,14 @@ void radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t
void radv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) __attribute__ ((weak));
void radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride) __attribute__ ((weak));
void radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride) __attribute__ ((weak));
-void radv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) __attribute__ ((weak));
+void radv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) __attribute__ ((weak));
void radv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset) __attribute__ ((weak));
void radv_CmdCopyBuffer(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferCopy* pRegions) __attribute__ ((weak));
void radv_CmdCopyImage(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageCopy* pRegions) __attribute__ ((weak));
void radv_CmdBlitImage(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkFilter filter) __attribute__ ((weak));
void radv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions) __attribute__ ((weak));
void radv_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions) __attribute__ ((weak));
-void radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const void* pData) __attribute__ ((weak));
+void radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const uint32_t* pData) __attribute__ ((weak));
void radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize size, uint32_t data) __attribute__ ((weak));
void radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) __attribute__ ((weak));
void radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) __attribute__ ((weak));
@@ -531,12 +533,14 @@ void radv_DestroySwapchainKHR(VkDevice device, VkSwapchainKHR swapchain, const V
VkResult radv_GetSwapchainImagesKHR(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pSwapchainImageCount, VkImage* pSwapchainImages) __attribute__ ((weak));
VkResult radv_AcquireNextImageKHR(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, VkFence fence, uint32_t* pImageIndex) __attribute__ ((weak));
VkResult radv_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* pPresentInfo) __attribute__ ((weak));
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
-VkResult radv_CreateWaylandSurfaceKHR(VkInstance instance, const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak));
-#endif // VK_USE_PLATFORM_WAYLAND_KHR
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
-VkBool32 radv_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, struct wl_display* display) __attribute__ ((weak));
-#endif // VK_USE_PLATFORM_WAYLAND_KHR
+VkResult radv_GetPhysicalDeviceDisplayPropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPropertiesKHR* pProperties) __attribute__ ((weak));
+VkResult radv_GetPhysicalDeviceDisplayPlanePropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPlanePropertiesKHR* pProperties) __attribute__ ((weak));
+VkResult radv_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physicalDevice, uint32_t planeIndex, uint32_t* pDisplayCount, VkDisplayKHR* pDisplays) __attribute__ ((weak));
+VkResult radv_GetDisplayModePropertiesKHR(VkPhysicalDevice physicalDevice, VkDisplayKHR display, uint32_t* pPropertyCount, VkDisplayModePropertiesKHR* pProperties) __attribute__ ((weak));
+VkResult radv_CreateDisplayModeKHR(VkPhysicalDevice physicalDevice, VkDisplayKHR display, const VkDisplayModeCreateInfoKHR*pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDisplayModeKHR* pMode) __attribute__ ((weak));
+VkResult radv_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physicalDevice, VkDisplayModeKHR mode, uint32_t planeIndex, VkDisplayPlaneCapabilitiesKHR* pCapabilities) __attribute__ ((weak));
+VkResult radv_CreateDisplayPlaneSurfaceKHR(VkInstance instance, const VkDisplaySurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak));
+VkResult radv_CreateSharedSwapchainsKHR(VkDevice device, uint32_t swapchainCount, const VkSwapchainCreateInfoKHR* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchains) __attribute__ ((weak));
#ifdef VK_USE_PLATFORM_XLIB_KHR
VkResult radv_CreateXlibSurfaceKHR(VkInstance instance, const VkXlibSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak));
#endif // VK_USE_PLATFORM_XLIB_KHR
@@ -549,40 +553,49 @@ VkResult radv_CreateXcbSurfaceKHR(VkInstance instance, const VkXcbSurfaceCreateI
#ifdef VK_USE_PLATFORM_XCB_KHR
VkBool32 radv_GetPhysicalDeviceXcbPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, xcb_connection_t* connection, xcb_visualid_t visual_id) __attribute__ ((weak));
#endif // VK_USE_PLATFORM_XCB_KHR
-void radv_CmdDrawIndirectCountAMD(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) __attribute__ ((weak));
-void radv_CmdDrawIndexedIndirectCountAMD(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) __attribute__ ((weak));
-void radv_GetPhysicalDeviceFeatures2KHR(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures2KHR* pFeatures) __attribute__ ((weak));
-void radv_GetPhysicalDeviceProperties2KHR(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties2KHR* pProperties) __attribute__ ((weak));
-void radv_GetPhysicalDeviceFormatProperties2KHR(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties2KHR* pFormatProperties) __attribute__ ((weak));
-VkResult radv_GetPhysicalDeviceImageFormatProperties2KHR(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceImageFormatInfo2KHR* pImageFormatInfo, VkImageFormatProperties2KHR* pImageFormatProperties) __attribute__ ((weak));
-void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties2KHR* pQueueFamilyProperties) __attribute__ ((weak));
-void radv_GetPhysicalDeviceMemoryProperties2KHR(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties2KHR* pMemoryProperties) __attribute__ ((weak));
-void radv_GetPhysicalDeviceSparseImageFormatProperties2KHR(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2KHR* pFormatInfo, uint32_t* pPropertyCount, VkSparseImageFormatProperties2KHR* pProperties) __attribute__ ((weak));
-void radv_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t set, uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites) __attribute__ ((weak));
-void radv_TrimCommandPoolKHR(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlagsKHR flags) __attribute__ ((weak));
-VkResult radv_CreateDescriptorUpdateTemplateKHR(VkDevice device, const VkDescriptorUpdateTemplateCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorUpdateTemplateKHR* pDescriptorUpdateTemplate) __attribute__ ((weak));
-void radv_DestroyDescriptorUpdateTemplateKHR(VkDevice device, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const VkAllocationCallbacks* pAllocator) __attribute__ ((weak));
-void radv_UpdateDescriptorSetWithTemplateKHR(VkDevice device, VkDescriptorSet descriptorSet, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const void* pData) __attribute__ ((weak));
-void radv_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, VkPipelineLayout layout, uint32_t set, const void* pData) __attribute__ ((weak));
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+VkResult radv_CreateWaylandSurfaceKHR(VkInstance instance, const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak));
+#endif // VK_USE_PLATFORM_WAYLAND_KHR
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+VkBool32 radv_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, struct wl_display* display) __attribute__ ((weak));
+#endif // VK_USE_PLATFORM_WAYLAND_KHR
+#ifdef VK_USE_PLATFORM_MIR_KHR
+VkResult radv_CreateMirSurfaceKHR(VkInstance instance, const VkMirSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak));
+#endif // VK_USE_PLATFORM_MIR_KHR
+#ifdef VK_USE_PLATFORM_MIR_KHR
+VkBool32 radv_GetPhysicalDeviceMirPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, MirConnection* connection) __attribute__ ((weak));
+#endif // VK_USE_PLATFORM_MIR_KHR
+#ifdef VK_USE_PLATFORM_ANDROID_KHR
+VkResult radv_CreateAndroidSurfaceKHR(VkInstance instance, const VkAndroidSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak));
+#endif // VK_USE_PLATFORM_ANDROID_KHR
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+VkResult radv_CreateWin32SurfaceKHR(VkInstance instance, const VkWin32SurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) __attribute__ ((weak));
+#endif // VK_USE_PLATFORM_WIN32_KHR
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+VkBool32 radv_GetPhysicalDeviceWin32PresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex) __attribute__ ((weak));
+#endif // VK_USE_PLATFORM_WIN32_KHR
+VkResult radv_CreateDebugReportCallbackEXT(VkInstance instance, const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugReportCallbackEXT* pCallback) __attribute__ ((weak));
+void radv_DestroyDebugReportCallbackEXT(VkInstance instance, VkDebugReportCallbackEXT callback, const VkAllocationCallbacks* pAllocator) __attribute__ ((weak));
+void radv_DebugReportMessageEXT(VkInstance instance, VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, uint64_t object, size_t location, int32_t messageCode, const char* pLayerPrefix, const char* pMessage) __attribute__ ((weak));
const struct radv_dispatch_table radv_layer = {
.CreateInstance = radv_CreateInstance,
.DestroyInstance = radv_DestroyInstance,
.EnumeratePhysicalDevices = radv_EnumeratePhysicalDevices,
- .GetDeviceProcAddr = radv_GetDeviceProcAddr,
- .GetInstanceProcAddr = radv_GetInstanceProcAddr,
- .GetPhysicalDeviceProperties = radv_GetPhysicalDeviceProperties,
- .GetPhysicalDeviceQueueFamilyProperties = radv_GetPhysicalDeviceQueueFamilyProperties,
- .GetPhysicalDeviceMemoryProperties = radv_GetPhysicalDeviceMemoryProperties,
.GetPhysicalDeviceFeatures = radv_GetPhysicalDeviceFeatures,
.GetPhysicalDeviceFormatProperties = radv_GetPhysicalDeviceFormatProperties,
.GetPhysicalDeviceImageFormatProperties = radv_GetPhysicalDeviceImageFormatProperties,
+ .GetPhysicalDeviceProperties = radv_GetPhysicalDeviceProperties,
+ .GetPhysicalDeviceQueueFamilyProperties = radv_GetPhysicalDeviceQueueFamilyProperties,
+ .GetPhysicalDeviceMemoryProperties = radv_GetPhysicalDeviceMemoryProperties,
+ .GetInstanceProcAddr = radv_GetInstanceProcAddr,
+ .GetDeviceProcAddr = radv_GetDeviceProcAddr,
.CreateDevice = radv_CreateDevice,
.DestroyDevice = radv_DestroyDevice,
- .EnumerateInstanceLayerProperties = radv_EnumerateInstanceLayerProperties,
.EnumerateInstanceExtensionProperties = radv_EnumerateInstanceExtensionProperties,
- .EnumerateDeviceLayerProperties = radv_EnumerateDeviceLayerProperties,
.EnumerateDeviceExtensionProperties = radv_EnumerateDeviceExtensionProperties,
+ .EnumerateInstanceLayerProperties = radv_EnumerateInstanceLayerProperties,
+ .EnumerateDeviceLayerProperties = radv_EnumerateDeviceLayerProperties,
.GetDeviceQueue = radv_GetDeviceQueue,
.QueueSubmit = radv_QueueSubmit,
.QueueWaitIdle = radv_QueueWaitIdle,
@@ -594,10 +607,10 @@ const struct radv_dispatch_table radv_layer = {
.FlushMappedMemoryRanges = radv_FlushMappedMemoryRanges,
.InvalidateMappedMemoryRanges = radv_InvalidateMappedMemoryRanges,
.GetDeviceMemoryCommitment = radv_GetDeviceMemoryCommitment,
- .GetBufferMemoryRequirements = radv_GetBufferMemoryRequirements,
.BindBufferMemory = radv_BindBufferMemory,
- .GetImageMemoryRequirements = radv_GetImageMemoryRequirements,
.BindImageMemory = radv_BindImageMemory,
+ .GetBufferMemoryRequirements = radv_GetBufferMemoryRequirements,
+ .GetImageMemoryRequirements = radv_GetImageMemoryRequirements,
.GetImageSparseMemoryRequirements = radv_GetImageSparseMemoryRequirements,
.GetPhysicalDeviceSparseImageFormatProperties = radv_GetPhysicalDeviceSparseImageFormatProperties,
.QueueBindSparse = radv_QueueBindSparse,
@@ -713,12 +726,14 @@ const struct radv_dispatch_table radv_layer = {
.GetSwapchainImagesKHR = radv_GetSwapchainImagesKHR,
.AcquireNextImageKHR = radv_AcquireNextImageKHR,
.QueuePresentKHR = radv_QueuePresentKHR,
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
- .CreateWaylandSurfaceKHR = radv_CreateWaylandSurfaceKHR,
-#endif // VK_USE_PLATFORM_WAYLAND_KHR
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
- .GetPhysicalDeviceWaylandPresentationSupportKHR = radv_GetPhysicalDeviceWaylandPresentationSupportKHR,
-#endif // VK_USE_PLATFORM_WAYLAND_KHR
+ .GetPhysicalDeviceDisplayPropertiesKHR = radv_GetPhysicalDeviceDisplayPropertiesKHR,
+ .GetPhysicalDeviceDisplayPlanePropertiesKHR = radv_GetPhysicalDeviceDisplayPlanePropertiesKHR,
+ .GetDisplayPlaneSupportedDisplaysKHR = radv_GetDisplayPlaneSupportedDisplaysKHR,
+ .GetDisplayModePropertiesKHR = radv_GetDisplayModePropertiesKHR,
+ .CreateDisplayModeKHR = radv_CreateDisplayModeKHR,
+ .GetDisplayPlaneCapabilitiesKHR = radv_GetDisplayPlaneCapabilitiesKHR,
+ .CreateDisplayPlaneSurfaceKHR = radv_CreateDisplayPlaneSurfaceKHR,
+ .CreateSharedSwapchainsKHR = radv_CreateSharedSwapchainsKHR,
#ifdef VK_USE_PLATFORM_XLIB_KHR
.CreateXlibSurfaceKHR = radv_CreateXlibSurfaceKHR,
#endif // VK_USE_PLATFORM_XLIB_KHR
@@ -731,26 +746,35 @@ const struct radv_dispatch_table radv_layer = {
#ifdef VK_USE_PLATFORM_XCB_KHR
.GetPhysicalDeviceXcbPresentationSupportKHR = radv_GetPhysicalDeviceXcbPresentationSupportKHR,
#endif // VK_USE_PLATFORM_XCB_KHR
- .CmdDrawIndirectCountAMD = radv_CmdDrawIndirectCountAMD,
- .CmdDrawIndexedIndirectCountAMD = radv_CmdDrawIndexedIndirectCountAMD,
- .GetPhysicalDeviceFeatures2KHR = radv_GetPhysicalDeviceFeatures2KHR,
- .GetPhysicalDeviceProperties2KHR = radv_GetPhysicalDeviceProperties2KHR,
- .GetPhysicalDeviceFormatProperties2KHR = radv_GetPhysicalDeviceFormatProperties2KHR,
- .GetPhysicalDeviceImageFormatProperties2KHR = radv_GetPhysicalDeviceImageFormatProperties2KHR,
- .GetPhysicalDeviceQueueFamilyProperties2KHR = radv_GetPhysicalDeviceQueueFamilyProperties2KHR,
- .GetPhysicalDeviceMemoryProperties2KHR = radv_GetPhysicalDeviceMemoryProperties2KHR,
- .GetPhysicalDeviceSparseImageFormatProperties2KHR = radv_GetPhysicalDeviceSparseImageFormatProperties2KHR,
- .CmdPushDescriptorSetKHR = radv_CmdPushDescriptorSetKHR,
- .TrimCommandPoolKHR = radv_TrimCommandPoolKHR,
- .CreateDescriptorUpdateTemplateKHR = radv_CreateDescriptorUpdateTemplateKHR,
- .DestroyDescriptorUpdateTemplateKHR = radv_DestroyDescriptorUpdateTemplateKHR,
- .UpdateDescriptorSetWithTemplateKHR = radv_UpdateDescriptorSetWithTemplateKHR,
- .CmdPushDescriptorSetWithTemplateKHR = radv_CmdPushDescriptorSetWithTemplateKHR,
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+ .CreateWaylandSurfaceKHR = radv_CreateWaylandSurfaceKHR,
+#endif // VK_USE_PLATFORM_WAYLAND_KHR
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+ .GetPhysicalDeviceWaylandPresentationSupportKHR = radv_GetPhysicalDeviceWaylandPresentationSupportKHR,
+#endif // VK_USE_PLATFORM_WAYLAND_KHR
+#ifdef VK_USE_PLATFORM_MIR_KHR
+ .CreateMirSurfaceKHR = radv_CreateMirSurfaceKHR,
+#endif // VK_USE_PLATFORM_MIR_KHR
+#ifdef VK_USE_PLATFORM_MIR_KHR
+ .GetPhysicalDeviceMirPresentationSupportKHR = radv_GetPhysicalDeviceMirPresentationSupportKHR,
+#endif // VK_USE_PLATFORM_MIR_KHR
+#ifdef VK_USE_PLATFORM_ANDROID_KHR
+ .CreateAndroidSurfaceKHR = radv_CreateAndroidSurfaceKHR,
+#endif // VK_USE_PLATFORM_ANDROID_KHR
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+ .CreateWin32SurfaceKHR = radv_CreateWin32SurfaceKHR,
+#endif // VK_USE_PLATFORM_WIN32_KHR
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+ .GetPhysicalDeviceWin32PresentationSupportKHR = radv_GetPhysicalDeviceWin32PresentationSupportKHR,
+#endif // VK_USE_PLATFORM_WIN32_KHR
+ .CreateDebugReportCallbackEXT = radv_CreateDebugReportCallbackEXT,
+ .DestroyDebugReportCallbackEXT = radv_DestroyDebugReportCallbackEXT,
+ .DebugReportMessageEXT = radv_DebugReportMessageEXT,
};
-static void * __attribute__ ((noinline))
+void * __attribute__ ((noinline))
radv_resolve_entrypoint(uint32_t index)
{
return radv_layer.entrypoints[index];
@@ -759,51 +783,51 @@ radv_resolve_entrypoint(uint32_t index)
/* Hash table stats:
* size 256 entries
* collisions entries
- * 0 115
- * 1 29
+ * 0 111
+ * 1 30
* 2 10
- * 3 8
- * 4 4
+ * 3 7
+ * 4 5
* 5 1
- * 6 0
- * 7 0
+ * 6 1
+ * 7 1
* 8 1
- * 9+ 0
+ * 9+ 2
*/
#define none 0xffff
static const uint16_t map[] = {
- 0x0044, none, none, none, none, 0x002b, 0x0040, 0x0061,
- 0x0049, 0x0022, 0x0056, none, none, none, none, none,
- none, none, none, 0x0067, none, none, none, none,
- 0x0052, 0x0097, 0x0058, 0x004c, none, 0x0069, 0x00a5, none,
+ 0x0044, none, none, none, 0x0096, 0x002b, 0x0040, 0x0061,
+ 0x0049, 0x0022, 0x0056, none, none, none, 0x0095, none,
+ none, none, none, 0x0067, none, none, none, 0x0099,
+ 0x0052, 0x009d, 0x0058, 0x004c, none, 0x0069, none, none,
none, none, 0x0054, none, 0x0014, 0x005b, 0x0070, 0x0002,
- 0x007c, none, 0x001c, 0x002f, none, none, 0x0077, 0x0018,
- 0x004b, 0x002a, none, 0x0008, 0x0065, 0x0080, 0x006d, 0x0053,
- none, 0x009f, 0x004d, 0x0090, 0x0024, 0x00a0, 0x005e, 0x000b,
- 0x0088, 0x0091, none, 0x00a6, 0x005c, 0x0033, none, none,
- 0x0087, 0x003f, 0x001f, 0x002c, 0x0082, 0x005a, none, none,
- 0x0099, 0x0019, 0x0046, 0x003a, none, none, 0x0034, none,
- 0x0051, none, none, 0x0020, 0x009b, 0x0066, 0x0075, none,
- none, none, 0x0035, 0x001e, 0x006f, 0x0060, 0x0047, 0x000a,
- 0x0023, none, none, 0x006b, none, 0x0041, 0x0028, none,
- 0x0068, none, 0x00a1, 0x003e, 0x0048, 0x007b, 0x0055, none,
- none, 0x0045, 0x006e, 0x0084, none, 0x0089, 0x000e, 0x0030,
- none, 0x0027, 0x0081, none, 0x005d, 0x008a, 0x0003, 0x008f,
- none, 0x0063, 0x0006, none, 0x0093, 0x00a3, none, none,
+ 0x007c, none, 0x001e, 0x002f, none, none, 0x0077, 0x0018,
+ 0x004b, 0x002a, none, 0x0003, 0x0065, 0x0080, 0x006d, 0x0053,
+ none, none, 0x004d, 0x0090, 0x0024, none, 0x005e, 0x000b,
+ 0x0088, 0x0091, none, none, 0x005c, 0x0033, none, 0x00a8,
+ 0x0087, 0x003f, 0x001d, 0x002c, 0x0082, 0x005a, 0x00a2, none,
+ none, 0x0019, 0x0046, 0x003a, 0x0093, 0x00a1, 0x0034, none,
+ 0x0051, none, none, 0x0020, none, 0x0066, 0x0075, none,
+ none, 0x00a3, 0x0035, 0x001f, 0x006f, 0x0060, 0x0047, 0x0005,
+ 0x0023, 0x00a6, none, 0x006b, none, 0x0041, 0x0028, none,
+ 0x0068, none, none, 0x003e, 0x0048, 0x007b, 0x0055, 0x00a5,
+ none, 0x0045, 0x006e, 0x0084, none, 0x0089, 0x000d, 0x0030,
+ none, 0x0027, 0x0081, 0x009a, 0x005d, 0x008a, 0x000a, 0x008f,
+ none, 0x0063, 0x0007, none, 0x0098, 0x0097, none, none,
none, 0x0059, 0x0026, none, 0x003c, none, 0x0037, none,
- 0x0009, 0x0038, 0x0011, none, 0x0072, 0x0016, none, 0x003d,
- none, 0x006a, 0x003b, none, 0x004a, 0x0013, 0x0000, 0x007a,
- 0x002e, 0x0071, none, 0x0096, 0x0074, 0x0004, 0x004f, 0x0029,
- 0x00a4, 0x004e, 0x0095, 0x0031, 0x00a2, 0x001b, none, 0x0073,
+ 0x0004, 0x0038, 0x0011, none, 0x0072, 0x0016, none, 0x003d,
+ 0x00a4, 0x006a, 0x003b, none, 0x004a, 0x0013, 0x0000, 0x007a,
+ 0x002e, 0x0071, none, 0x009c, 0x0074, 0x0009, 0x004f, 0x0029,
+ none, 0x004e, 0x009b, 0x0031, none, 0x001b, none, 0x0073,
0x005f, 0x0032, 0x0078, 0x008e, none, none, none, 0x006c,
- 0x009a, none, 0x0036, none, 0x0050, 0x009c, 0x007d, none,
- 0x008c, 0x0005, 0x001a, 0x000c, 0x0098, 0x00a7, 0x0092, none,
- none, 0x008d, 0x0094, 0x0015, 0x0083, 0x0043, none, none,
- 0x000d, none, 0x0007, none, 0x0025, 0x007f, 0x001d, none,
- 0x0076, 0x009d, 0x0064, 0x0085, none, none, none, 0x000f,
- 0x007e, none, 0x009e, 0x0017, 0x0012, 0x0010, none, 0x0021,
+ none, none, 0x0036, none, 0x0050, 0x009f, 0x007d, none,
+ 0x008c, 0x0006, 0x001a, 0x000c, 0x009e, 0x0094, 0x0092, none,
+ none, 0x008d, 0x00a0, 0x0015, 0x0083, 0x0043, none, none,
+ 0x000f, 0x00a7, 0x0008, none, 0x0025, 0x007f, 0x001c, none,
+ 0x0076, none, 0x0064, 0x0085, none, none, none, 0x0010,
+ 0x007e, none, none, 0x0017, 0x0012, 0x000e, none, 0x0021,
0x008b, 0x0079, 0x0001, none, none, 0x002d, none, none,
none, 0x0086, none, 0x0062, none, 0x0057, 0x0042, 0x0039,
};
diff --git a/lib/mesa/src/amd/vulkan/radv_entrypoints.h b/lib/mesa/src/amd/vulkan/radv_entrypoints.h
index daeba391b..283aa1f33 100644
--- a/lib/mesa/src/amd/vulkan/radv_entrypoints.h
+++ b/lib/mesa/src/amd/vulkan/radv_entrypoints.h
@@ -2,25 +2,25 @@
struct radv_dispatch_table {
union {
- void *entrypoints[168];
+ void *entrypoints[169];
struct {
PFN_vkCreateInstance CreateInstance;
PFN_vkDestroyInstance DestroyInstance;
PFN_vkEnumeratePhysicalDevices EnumeratePhysicalDevices;
- PFN_vkGetDeviceProcAddr GetDeviceProcAddr;
- PFN_vkGetInstanceProcAddr GetInstanceProcAddr;
- PFN_vkGetPhysicalDeviceProperties GetPhysicalDeviceProperties;
- PFN_vkGetPhysicalDeviceQueueFamilyProperties GetPhysicalDeviceQueueFamilyProperties;
- PFN_vkGetPhysicalDeviceMemoryProperties GetPhysicalDeviceMemoryProperties;
PFN_vkGetPhysicalDeviceFeatures GetPhysicalDeviceFeatures;
PFN_vkGetPhysicalDeviceFormatProperties GetPhysicalDeviceFormatProperties;
PFN_vkGetPhysicalDeviceImageFormatProperties GetPhysicalDeviceImageFormatProperties;
+ PFN_vkGetPhysicalDeviceProperties GetPhysicalDeviceProperties;
+ PFN_vkGetPhysicalDeviceQueueFamilyProperties GetPhysicalDeviceQueueFamilyProperties;
+ PFN_vkGetPhysicalDeviceMemoryProperties GetPhysicalDeviceMemoryProperties;
+ PFN_vkGetInstanceProcAddr GetInstanceProcAddr;
+ PFN_vkGetDeviceProcAddr GetDeviceProcAddr;
PFN_vkCreateDevice CreateDevice;
PFN_vkDestroyDevice DestroyDevice;
- PFN_vkEnumerateInstanceLayerProperties EnumerateInstanceLayerProperties;
PFN_vkEnumerateInstanceExtensionProperties EnumerateInstanceExtensionProperties;
- PFN_vkEnumerateDeviceLayerProperties EnumerateDeviceLayerProperties;
PFN_vkEnumerateDeviceExtensionProperties EnumerateDeviceExtensionProperties;
+ PFN_vkEnumerateInstanceLayerProperties EnumerateInstanceLayerProperties;
+ PFN_vkEnumerateDeviceLayerProperties EnumerateDeviceLayerProperties;
PFN_vkGetDeviceQueue GetDeviceQueue;
PFN_vkQueueSubmit QueueSubmit;
PFN_vkQueueWaitIdle QueueWaitIdle;
@@ -32,10 +32,10 @@ struct radv_dispatch_table {
PFN_vkFlushMappedMemoryRanges FlushMappedMemoryRanges;
PFN_vkInvalidateMappedMemoryRanges InvalidateMappedMemoryRanges;
PFN_vkGetDeviceMemoryCommitment GetDeviceMemoryCommitment;
- PFN_vkGetBufferMemoryRequirements GetBufferMemoryRequirements;
PFN_vkBindBufferMemory BindBufferMemory;
- PFN_vkGetImageMemoryRequirements GetImageMemoryRequirements;
PFN_vkBindImageMemory BindImageMemory;
+ PFN_vkGetBufferMemoryRequirements GetBufferMemoryRequirements;
+ PFN_vkGetImageMemoryRequirements GetImageMemoryRequirements;
PFN_vkGetImageSparseMemoryRequirements GetImageSparseMemoryRequirements;
PFN_vkGetPhysicalDeviceSparseImageFormatProperties GetPhysicalDeviceSparseImageFormatProperties;
PFN_vkQueueBindSparse QueueBindSparse;
@@ -151,16 +151,14 @@ struct radv_dispatch_table {
PFN_vkGetSwapchainImagesKHR GetSwapchainImagesKHR;
PFN_vkAcquireNextImageKHR AcquireNextImageKHR;
PFN_vkQueuePresentKHR QueuePresentKHR;
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
- PFN_vkCreateWaylandSurfaceKHR CreateWaylandSurfaceKHR;
-#else
- void *CreateWaylandSurfaceKHR;
-#endif
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
- PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR GetPhysicalDeviceWaylandPresentationSupportKHR;
-#else
- void *GetPhysicalDeviceWaylandPresentationSupportKHR;
-#endif
+ PFN_vkGetPhysicalDeviceDisplayPropertiesKHR GetPhysicalDeviceDisplayPropertiesKHR;
+ PFN_vkGetPhysicalDeviceDisplayPlanePropertiesKHR GetPhysicalDeviceDisplayPlanePropertiesKHR;
+ PFN_vkGetDisplayPlaneSupportedDisplaysKHR GetDisplayPlaneSupportedDisplaysKHR;
+ PFN_vkGetDisplayModePropertiesKHR GetDisplayModePropertiesKHR;
+ PFN_vkCreateDisplayModeKHR CreateDisplayModeKHR;
+ PFN_vkGetDisplayPlaneCapabilitiesKHR GetDisplayPlaneCapabilitiesKHR;
+ PFN_vkCreateDisplayPlaneSurfaceKHR CreateDisplayPlaneSurfaceKHR;
+ PFN_vkCreateSharedSwapchainsKHR CreateSharedSwapchainsKHR;
#ifdef VK_USE_PLATFORM_XLIB_KHR
PFN_vkCreateXlibSurfaceKHR CreateXlibSurfaceKHR;
#else
@@ -181,21 +179,44 @@ struct radv_dispatch_table {
#else
void *GetPhysicalDeviceXcbPresentationSupportKHR;
#endif
- PFN_vkCmdDrawIndirectCountAMD CmdDrawIndirectCountAMD;
- PFN_vkCmdDrawIndexedIndirectCountAMD CmdDrawIndexedIndirectCountAMD;
- PFN_vkGetPhysicalDeviceFeatures2KHR GetPhysicalDeviceFeatures2KHR;
- PFN_vkGetPhysicalDeviceProperties2KHR GetPhysicalDeviceProperties2KHR;
- PFN_vkGetPhysicalDeviceFormatProperties2KHR GetPhysicalDeviceFormatProperties2KHR;
- PFN_vkGetPhysicalDeviceImageFormatProperties2KHR GetPhysicalDeviceImageFormatProperties2KHR;
- PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR GetPhysicalDeviceQueueFamilyProperties2KHR;
- PFN_vkGetPhysicalDeviceMemoryProperties2KHR GetPhysicalDeviceMemoryProperties2KHR;
- PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR GetPhysicalDeviceSparseImageFormatProperties2KHR;
- PFN_vkCmdPushDescriptorSetKHR CmdPushDescriptorSetKHR;
- PFN_vkTrimCommandPoolKHR TrimCommandPoolKHR;
- PFN_vkCreateDescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplateKHR;
- PFN_vkDestroyDescriptorUpdateTemplateKHR DestroyDescriptorUpdateTemplateKHR;
- PFN_vkUpdateDescriptorSetWithTemplateKHR UpdateDescriptorSetWithTemplateKHR;
- PFN_vkCmdPushDescriptorSetWithTemplateKHR CmdPushDescriptorSetWithTemplateKHR;
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+ PFN_vkCreateWaylandSurfaceKHR CreateWaylandSurfaceKHR;
+#else
+ void *CreateWaylandSurfaceKHR;
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+ PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR GetPhysicalDeviceWaylandPresentationSupportKHR;
+#else
+ void *GetPhysicalDeviceWaylandPresentationSupportKHR;
+#endif
+#ifdef VK_USE_PLATFORM_MIR_KHR
+ PFN_vkCreateMirSurfaceKHR CreateMirSurfaceKHR;
+#else
+ void *CreateMirSurfaceKHR;
+#endif
+#ifdef VK_USE_PLATFORM_MIR_KHR
+ PFN_vkGetPhysicalDeviceMirPresentationSupportKHR GetPhysicalDeviceMirPresentationSupportKHR;
+#else
+ void *GetPhysicalDeviceMirPresentationSupportKHR;
+#endif
+#ifdef VK_USE_PLATFORM_ANDROID_KHR
+ PFN_vkCreateAndroidSurfaceKHR CreateAndroidSurfaceKHR;
+#else
+ void *CreateAndroidSurfaceKHR;
+#endif
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+ PFN_vkCreateWin32SurfaceKHR CreateWin32SurfaceKHR;
+#else
+ void *CreateWin32SurfaceKHR;
+#endif
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+ PFN_vkGetPhysicalDeviceWin32PresentationSupportKHR GetPhysicalDeviceWin32PresentationSupportKHR;
+#else
+ void *GetPhysicalDeviceWin32PresentationSupportKHR;
+#endif
+ PFN_vkCreateDebugReportCallbackEXT CreateDebugReportCallbackEXT;
+ PFN_vkDestroyDebugReportCallbackEXT DestroyDebugReportCallbackEXT;
+ PFN_vkDebugReportMessageEXT DebugReportMessageEXT;
};
};
@@ -205,20 +226,20 @@ struct radv_dispatch_table {
VkResult radv_CreateInstance(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance);
void radv_DestroyInstance(VkInstance instance, const VkAllocationCallbacks* pAllocator);
VkResult radv_EnumeratePhysicalDevices(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices);
-PFN_vkVoidFunction radv_GetDeviceProcAddr(VkDevice device, const char* pName);
-PFN_vkVoidFunction radv_GetInstanceProcAddr(VkInstance instance, const char* pName);
-void radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties);
-void radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties);
-void radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties);
void radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures);
void radv_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties);
VkResult radv_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties);
+void radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties);
+void radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties);
+void radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties);
+PFN_vkVoidFunction radv_GetInstanceProcAddr(VkInstance instance, const char* pName);
+PFN_vkVoidFunction radv_GetDeviceProcAddr(VkDevice device, const char* pName);
VkResult radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice);
void radv_DestroyDevice(VkDevice device, const VkAllocationCallbacks* pAllocator);
-VkResult radv_EnumerateInstanceLayerProperties(uint32_t* pPropertyCount, VkLayerProperties* pProperties);
VkResult radv_EnumerateInstanceExtensionProperties(const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties);
-VkResult radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkLayerProperties* pProperties);
VkResult radv_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties);
+VkResult radv_EnumerateInstanceLayerProperties(uint32_t* pPropertyCount, VkLayerProperties* pProperties);
+VkResult radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkLayerProperties* pProperties);
void radv_GetDeviceQueue(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue);
VkResult radv_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence);
VkResult radv_QueueWaitIdle(VkQueue queue);
@@ -230,10 +251,10 @@ void radv_UnmapMemory(VkDevice device, VkDeviceMemory memory);
VkResult radv_FlushMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges);
VkResult radv_InvalidateMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges);
void radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes);
-void radv_GetBufferMemoryRequirements(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements);
VkResult radv_BindBufferMemory(VkDevice device, VkBuffer buffer, VkDeviceMemory memory, VkDeviceSize memoryOffset);
-void radv_GetImageMemoryRequirements(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements);
VkResult radv_BindImageMemory(VkDevice device, VkImage image, VkDeviceMemory memory, VkDeviceSize memoryOffset);
+void radv_GetBufferMemoryRequirements(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements);
+void radv_GetImageMemoryRequirements(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements);
void radv_GetImageSparseMemoryRequirements(VkDevice device, VkImage image, uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements* pSparseMemoryRequirements);
void radv_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pPropertyCount, VkSparseImageFormatProperties* pProperties);
VkResult radv_QueueBindSparse(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo, VkFence fence);
@@ -312,14 +333,14 @@ void radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t
void radv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance);
void radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride);
void radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride);
-void radv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
+void radv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z);
void radv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset);
void radv_CmdCopyBuffer(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferCopy* pRegions);
void radv_CmdCopyImage(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageCopy* pRegions);
void radv_CmdBlitImage(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkFilter filter);
void radv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions);
void radv_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions);
-void radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const void* pData);
+void radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const uint32_t* pData);
void radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize size, uint32_t data);
void radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges);
void radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges);
@@ -349,12 +370,14 @@ void radv_DestroySwapchainKHR(VkDevice device, VkSwapchainKHR swapchain, const V
VkResult radv_GetSwapchainImagesKHR(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pSwapchainImageCount, VkImage* pSwapchainImages);
VkResult radv_AcquireNextImageKHR(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, VkFence fence, uint32_t* pImageIndex);
VkResult radv_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* pPresentInfo);
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
-VkResult radv_CreateWaylandSurfaceKHR(VkInstance instance, const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface);
-#endif // VK_USE_PLATFORM_WAYLAND_KHR
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
-VkBool32 radv_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, struct wl_display* display);
-#endif // VK_USE_PLATFORM_WAYLAND_KHR
+VkResult radv_GetPhysicalDeviceDisplayPropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPropertiesKHR* pProperties);
+VkResult radv_GetPhysicalDeviceDisplayPlanePropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPlanePropertiesKHR* pProperties);
+VkResult radv_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physicalDevice, uint32_t planeIndex, uint32_t* pDisplayCount, VkDisplayKHR* pDisplays);
+VkResult radv_GetDisplayModePropertiesKHR(VkPhysicalDevice physicalDevice, VkDisplayKHR display, uint32_t* pPropertyCount, VkDisplayModePropertiesKHR* pProperties);
+VkResult radv_CreateDisplayModeKHR(VkPhysicalDevice physicalDevice, VkDisplayKHR display, const VkDisplayModeCreateInfoKHR*pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDisplayModeKHR* pMode);
+VkResult radv_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physicalDevice, VkDisplayModeKHR mode, uint32_t planeIndex, VkDisplayPlaneCapabilitiesKHR* pCapabilities);
+VkResult radv_CreateDisplayPlaneSurfaceKHR(VkInstance instance, const VkDisplaySurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface);
+VkResult radv_CreateSharedSwapchainsKHR(VkDevice device, uint32_t swapchainCount, const VkSwapchainCreateInfoKHR* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchains);
#ifdef VK_USE_PLATFORM_XLIB_KHR
VkResult radv_CreateXlibSurfaceKHR(VkInstance instance, const VkXlibSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface);
#endif // VK_USE_PLATFORM_XLIB_KHR
@@ -367,18 +390,27 @@ VkResult radv_CreateXcbSurfaceKHR(VkInstance instance, const VkXcbSurfaceCreateI
#ifdef VK_USE_PLATFORM_XCB_KHR
VkBool32 radv_GetPhysicalDeviceXcbPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, xcb_connection_t* connection, xcb_visualid_t visual_id);
#endif // VK_USE_PLATFORM_XCB_KHR
-void radv_CmdDrawIndirectCountAMD(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride);
-void radv_CmdDrawIndexedIndirectCountAMD(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride);
-void radv_GetPhysicalDeviceFeatures2KHR(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures2KHR* pFeatures);
-void radv_GetPhysicalDeviceProperties2KHR(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties2KHR* pProperties);
-void radv_GetPhysicalDeviceFormatProperties2KHR(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties2KHR* pFormatProperties);
-VkResult radv_GetPhysicalDeviceImageFormatProperties2KHR(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceImageFormatInfo2KHR* pImageFormatInfo, VkImageFormatProperties2KHR* pImageFormatProperties);
-void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties2KHR* pQueueFamilyProperties);
-void radv_GetPhysicalDeviceMemoryProperties2KHR(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties2KHR* pMemoryProperties);
-void radv_GetPhysicalDeviceSparseImageFormatProperties2KHR(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2KHR* pFormatInfo, uint32_t* pPropertyCount, VkSparseImageFormatProperties2KHR* pProperties);
-void radv_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t set, uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites);
-void radv_TrimCommandPoolKHR(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlagsKHR flags);
-VkResult radv_CreateDescriptorUpdateTemplateKHR(VkDevice device, const VkDescriptorUpdateTemplateCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorUpdateTemplateKHR* pDescriptorUpdateTemplate);
-void radv_DestroyDescriptorUpdateTemplateKHR(VkDevice device, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const VkAllocationCallbacks* pAllocator);
-void radv_UpdateDescriptorSetWithTemplateKHR(VkDevice device, VkDescriptorSet descriptorSet, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const void* pData);
-void radv_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, VkPipelineLayout layout, uint32_t set, const void* pData);
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+VkResult radv_CreateWaylandSurfaceKHR(VkInstance instance, const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface);
+#endif // VK_USE_PLATFORM_WAYLAND_KHR
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+VkBool32 radv_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, struct wl_display* display);
+#endif // VK_USE_PLATFORM_WAYLAND_KHR
+#ifdef VK_USE_PLATFORM_MIR_KHR
+VkResult radv_CreateMirSurfaceKHR(VkInstance instance, const VkMirSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface);
+#endif // VK_USE_PLATFORM_MIR_KHR
+#ifdef VK_USE_PLATFORM_MIR_KHR
+VkBool32 radv_GetPhysicalDeviceMirPresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, MirConnection* connection);
+#endif // VK_USE_PLATFORM_MIR_KHR
+#ifdef VK_USE_PLATFORM_ANDROID_KHR
+VkResult radv_CreateAndroidSurfaceKHR(VkInstance instance, const VkAndroidSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface);
+#endif // VK_USE_PLATFORM_ANDROID_KHR
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+VkResult radv_CreateWin32SurfaceKHR(VkInstance instance, const VkWin32SurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface);
+#endif // VK_USE_PLATFORM_WIN32_KHR
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+VkBool32 radv_GetPhysicalDeviceWin32PresentationSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex);
+#endif // VK_USE_PLATFORM_WIN32_KHR
+VkResult radv_CreateDebugReportCallbackEXT(VkInstance instance, const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugReportCallbackEXT* pCallback);
+void radv_DestroyDebugReportCallbackEXT(VkInstance instance, VkDebugReportCallbackEXT callback, const VkAllocationCallbacks* pAllocator);
+void radv_DebugReportMessageEXT(VkInstance instance, VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, uint64_t object, size_t location, int32_t messageCode, const char* pLayerPrefix, const char* pMessage);
diff --git a/lib/mesa/src/amd/vulkan/radv_entrypoints_gen.py b/lib/mesa/src/amd/vulkan/radv_entrypoints_gen.py
index 3474c789e..a6e832a0a 100644
--- a/lib/mesa/src/amd/vulkan/radv_entrypoints_gen.py
+++ b/lib/mesa/src/amd/vulkan/radv_entrypoints_gen.py
@@ -22,27 +22,14 @@
# IN THE SOFTWARE.
#
-import sys
-import xml.etree.ElementTree as ET
-
-max_api_version = 1.0
-
-supported_extensions = [
- 'VK_AMD_draw_indirect_count',
- 'VK_NV_dedicated_allocation',
- 'VK_KHR_descriptor_update_template',
- 'VK_KHR_get_physical_device_properties2',
- 'VK_KHR_incremental_present',
- 'VK_KHR_maintenance1',
- 'VK_KHR_push_descriptor',
- 'VK_KHR_sampler_mirror_clamp_to_edge',
- 'VK_KHR_shader_draw_parameters',
- 'VK_KHR_surface',
- 'VK_KHR_swapchain',
- 'VK_KHR_wayland_surface',
- 'VK_KHR_xcb_surface',
- 'VK_KHR_xlib_surface',
-]
+import fileinput, re, sys
+
+# Each function typedef in the vulkan.h header is all on one line and matches
+# this regepx. We hope that won't change.
+
+p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);')
+
+entrypoints = []
# We generate a static hash table for entry point lookup
# (vkGetProcAddress). We use a linear congruential generator for our hash
@@ -64,11 +51,29 @@ def hash(name):
return h
-def print_guard_start(guard):
+def get_platform_guard_macro(name):
+ if "Xlib" in name:
+ return "VK_USE_PLATFORM_XLIB_KHR"
+ elif "Xcb" in name:
+ return "VK_USE_PLATFORM_XCB_KHR"
+ elif "Wayland" in name:
+ return "VK_USE_PLATFORM_WAYLAND_KHR"
+ elif "Mir" in name:
+ return "VK_USE_PLATFORM_MIR_KHR"
+ elif "Android" in name:
+ return "VK_USE_PLATFORM_ANDROID_KHR"
+ elif "Win32" in name:
+ return "VK_USE_PLATFORM_WIN32_KHR"
+ else:
+ return None
+
+def print_guard_start(name):
+ guard = get_platform_guard_macro(name)
if guard is not None:
print "#ifdef {0}".format(guard)
-def print_guard_end(guard):
+def print_guard_end(name):
+ guard = get_platform_guard_macro(name)
if guard is not None:
print "#endif // {0}".format(guard)
@@ -82,61 +87,18 @@ elif (sys.argv[1] == "code"):
opt_code = True
sys.argv.pop()
-# Extract the entry points from the registry
-def get_entrypoints(doc, entrypoints_to_defines):
- entrypoints = []
-
- enabled_commands = set()
- for feature in doc.findall('./feature'):
- assert feature.attrib['api'] == 'vulkan'
- if float(feature.attrib['number']) > max_api_version:
- continue
-
- for command in feature.findall('./require/command'):
- enabled_commands.add(command.attrib['name'])
+# Parse the entry points in the header
- for extension in doc.findall('.extensions/extension'):
- if extension.attrib['name'] not in supported_extensions:
+i = 0
+for line in fileinput.input():
+ m = p.match(line)
+ if (m):
+ if m.group(2) == 'VoidFunction':
continue
-
- assert extension.attrib['supported'] == 'vulkan'
- for command in extension.findall('./require/command'):
- enabled_commands.add(command.attrib['name'])
-
- index = 0
- for command in doc.findall('./commands/command'):
- type = command.find('./proto/type').text
- fullname = command.find('./proto/name').text
-
- if fullname not in enabled_commands:
- continue
-
- shortname = fullname[2:]
- params = map(lambda p: "".join(p.itertext()), command.findall('./param'))
- params = ', '.join(params)
- if fullname in entrypoints_to_defines:
- guard = entrypoints_to_defines[fullname]
- else:
- guard = None
- entrypoints.append((type, shortname, params, index, hash(fullname), guard))
- index += 1
-
- return entrypoints
-
-# Maps entry points to extension defines
-def get_entrypoints_defines(doc):
- entrypoints_to_defines = {}
- extensions = doc.findall('./extensions/extension')
- for extension in extensions:
- define = extension.get('protect')
- entrypoints = extension.findall('./require/command')
- for entrypoint in entrypoints:
- fullname = entrypoint.get('name')
- entrypoints_to_defines[fullname] = define
- return entrypoints_to_defines
-
-doc = ET.parse(sys.stdin)
-entrypoints = get_entrypoints(doc, get_entrypoints_defines(doc))
+ fullname = "vk" + m.group(2)
+ h = hash(fullname)
+ entrypoints.append((m.group(1), m.group(2), m.group(3), i, h))
+ i = i + 1
# For outputting entrypoints.h we generate a radv_EntryPoint() prototype
# per entry point.
@@ -149,7 +111,8 @@ if opt_header:
print " void *entrypoints[%d];" % len(entrypoints)
print " struct {"
- for type, name, args, num, h, guard in entrypoints:
+ for type, name, args, num, h in entrypoints:
+ guard = get_platform_guard_macro(name)
if guard is not None:
print "#ifdef {0}".format(guard)
print " PFN_vk{0} {0};".format(name)
@@ -162,10 +125,10 @@ if opt_header:
print " };\n"
print "};\n"
- for type, name, args, num, h, guard in entrypoints:
- print_guard_start(guard)
- print "%s radv_%s(%s);" % (type, name, args)
- print_guard_end(guard)
+ for type, name, args, num, h in entrypoints:
+ print_guard_start(name)
+ print "%s radv_%s%s;" % (type, name, args)
+ print_guard_end(name)
exit()
@@ -211,7 +174,7 @@ static const char strings[] ="""
offsets = []
i = 0;
-for type, name, args, num, h, guard in entrypoints:
+for type, name, args, num, h in entrypoints:
print " \"vk%s\\0\"" % name
offsets.append(i)
i += 2 + len(name) + 1
@@ -220,7 +183,7 @@ print " ;"
# Now generate the table of all entry points
print "\nstatic const struct radv_entrypoint entrypoints[] = {"
-for type, name, args, num, h, guard in entrypoints:
+for type, name, args, num, h in entrypoints:
print " { %5d, 0x%08x }," % (offsets[num], h)
print "};\n"
@@ -233,20 +196,20 @@ print """
"""
for layer in [ "radv" ]:
- for type, name, args, num, h, guard in entrypoints:
- print_guard_start(guard)
- print "%s %s_%s(%s) __attribute__ ((weak));" % (type, layer, name, args)
- print_guard_end(guard)
+ for type, name, args, num, h in entrypoints:
+ print_guard_start(name)
+ print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args)
+ print_guard_end(name)
print "\nconst struct radv_dispatch_table %s_layer = {" % layer
- for type, name, args, num, h, guard in entrypoints:
- print_guard_start(guard)
+ for type, name, args, num, h in entrypoints:
+ print_guard_start(name)
print " .%s = %s_%s," % (name, layer, name)
- print_guard_end(guard)
+ print_guard_end(name)
print "};\n"
print """
-static void * __attribute__ ((noinline))
+void * __attribute__ ((noinline))
radv_resolve_entrypoint(uint32_t index)
{
return radv_layer.entrypoints[index];
@@ -259,7 +222,7 @@ radv_resolve_entrypoint(uint32_t index)
map = [none for f in xrange(hash_size)]
collisions = [0 for f in xrange(10)]
-for type, name, args, num, h, guard in entrypoints:
+for type, name, args, num, h in entrypoints:
level = 0
while map[h & hash_mask] != none:
h = h + prime_step
diff --git a/lib/mesa/src/amd/vulkan/radv_formats.c b/lib/mesa/src/amd/vulkan/radv_formats.c
index 61cc67398..fe786b3a4 100644
--- a/lib/mesa/src/amd/vulkan/radv_formats.c
+++ b/lib/mesa/src/amd/vulkan/radv_formats.c
@@ -30,7 +30,6 @@
#include "util/u_half.h"
#include "util/format_srgb.h"
-#include "util/format_r11g11b10f.h"
uint32_t radv_translate_buffer_dataformat(const struct vk_format_description *desc,
int first_non_void)
@@ -395,7 +394,7 @@ uint32_t radv_translate_color_numformat(VkFormat format,
int first_non_void)
{
unsigned ntype;
- if (first_non_void == -1 || desc->channel[first_non_void].type == VK_FORMAT_TYPE_FLOAT)
+ if (first_non_void == 4 || desc->channel[first_non_void].type == VK_FORMAT_TYPE_FLOAT)
ntype = V_028C70_NUMBER_FLOAT;
else {
ntype = V_028C70_NUMBER_UNORM;
@@ -498,7 +497,7 @@ static bool radv_is_storage_image_format_supported(struct radv_physical_device *
}
}
-static bool radv_is_buffer_format_supported(VkFormat format, bool *scaled)
+static bool radv_is_buffer_format_supported(VkFormat format)
{
const struct vk_format_description *desc = vk_format_description(format);
unsigned data_format, num_format;
@@ -510,7 +509,6 @@ static bool radv_is_buffer_format_supported(VkFormat format, bool *scaled)
num_format = radv_translate_buffer_numformat(desc,
vk_format_get_first_non_void_channel(format));
- *scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) || (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED);
return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID &&
num_format != ~0;
}
@@ -537,7 +535,7 @@ bool radv_is_colorbuffer_format_supported(VkFormat format, bool *blendable)
static bool radv_is_zs_format_supported(VkFormat format)
{
- return radv_translate_dbformat(format) != V_028040_Z_INVALID || format == VK_FORMAT_S8_UINT;
+ return radv_translate_dbformat(format) != V_028040_Z_INVALID;
}
static void
@@ -548,7 +546,6 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0;
const struct vk_format_description *desc = vk_format_description(format);
bool blendable;
- bool scaled = false;
if (!desc) {
out_properties->linearTilingFeatures = linear;
out_properties->optimalTilingFeatures = tiled;
@@ -561,22 +558,18 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
}
- if (radv_is_buffer_format_supported(format, &scaled)) {
- buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
- if (!scaled)
- buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT |
- VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
+ if (radv_is_buffer_format_supported(format)) {
+ buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT |
+ VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT |
+ VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
}
if (vk_format_is_depth_or_stencil(format)) {
- if (radv_is_zs_format_supported(format)) {
+ if (radv_is_zs_format_supported(format))
tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
- tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
- tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
- VK_FORMAT_FEATURE_BLIT_DST_BIT;
- tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
- VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
- }
+ tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
+ tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
+ VK_FORMAT_FEATURE_BLIT_DST_BIT;
} else {
bool linear_sampling;
if (radv_is_sampler_format_supported(format, &linear_sampling)) {
@@ -597,15 +590,6 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
}
}
- if (tiled && util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
- tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
- VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
- }
- }
-
- if (linear && util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
- linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
- VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
}
if (format == VK_FORMAT_R32_UINT || format == VK_FORMAT_R32_SINT) {
@@ -746,6 +730,9 @@ uint32_t radv_translate_dbformat(VkFormat format)
case VK_FORMAT_D16_UNORM:
case VK_FORMAT_D16_UNORM_S8_UINT:
return V_028040_Z_16;
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ return V_028040_Z_24; /* deprecated on SI */
case VK_FORMAT_D32_SFLOAT:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
return V_028040_Z_32_FLOAT;
@@ -864,10 +851,6 @@ bool radv_format_pack_clear_color(VkFormat format,
clear_vals[0] = value->uint32[0] & 0xff;
clear_vals[1] = 0;
break;
- case VK_FORMAT_R8_SINT:
- clear_vals[0] = value->int32[0] & 0xff;
- clear_vals[1] = 0;
- break;
case VK_FORMAT_R16_UINT:
clear_vals[0] = value->uint32[0] & 0xffff;
clear_vals[1] = 0;
@@ -877,11 +860,6 @@ bool radv_format_pack_clear_color(VkFormat format,
clear_vals[0] |= (value->uint32[1] & 0xff) << 8;
clear_vals[1] = 0;
break;
- case VK_FORMAT_R8G8_SINT:
- clear_vals[0] = value->int32[0] & 0xff;
- clear_vals[0] |= (value->int32[1] & 0xff) << 8;
- clear_vals[1] = 0;
- break;
case VK_FORMAT_R8G8B8A8_UINT:
clear_vals[0] = value->uint32[0] & 0xff;
clear_vals[0] |= (value->uint32[1] & 0xff) << 8;
@@ -889,13 +867,6 @@ bool radv_format_pack_clear_color(VkFormat format,
clear_vals[0] |= (value->uint32[3] & 0xff) << 24;
clear_vals[1] = 0;
break;
- case VK_FORMAT_R8G8B8A8_SINT:
- clear_vals[0] = value->int32[0] & 0xff;
- clear_vals[0] |= (value->int32[1] & 0xff) << 8;
- clear_vals[0] |= (value->int32[2] & 0xff) << 16;
- clear_vals[0] |= (value->int32[3] & 0xff) << 24;
- clear_vals[1] = 0;
- break;
case VK_FORMAT_A8B8G8R8_UINT_PACK32:
clear_vals[0] = value->uint32[0] & 0xff;
clear_vals[0] |= (value->uint32[1] & 0xff) << 8;
@@ -957,12 +928,8 @@ bool radv_format_pack_clear_color(VkFormat format,
clear_vals[1] |= ((uint16_t)util_iround(CLAMP(value->float32[3], 0.0f, 1.0f) * 0xffff)) << 16;
break;
case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
- clear_vals[0] = ((uint16_t)util_iround(CLAMP(value->float32[0], 0.0f, 1.0f) * 0x3ff)) & 0x3ff;
- clear_vals[0] |= (((uint16_t)util_iround(CLAMP(value->float32[1], 0.0f, 1.0f) * 0x3ff)) & 0x3ff) << 10;
- clear_vals[0] |= (((uint16_t)util_iround(CLAMP(value->float32[2], 0.0f, 1.0f) * 0x3ff)) & 0x3ff) << 20;
- clear_vals[0] |= (((uint16_t)util_iround(CLAMP(value->float32[3], 0.0f, 1.0f) * 0x3)) & 0x3) << 30;
- clear_vals[1] = 0;
- return true;
+ /* TODO */
+ return false;
case VK_FORMAT_R32G32_SFLOAT:
clear_vals[0] = fui(value->float32[0]);
clear_vals[1] = fui(value->float32[1]);
@@ -971,10 +938,6 @@ bool radv_format_pack_clear_color(VkFormat format,
clear_vals[1] = 0;
clear_vals[0] = fui(value->float32[0]);
break;
- case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
- clear_vals[0] = float3_to_r11g11b10f(value->float32);
- clear_vals[1] = 0;
- break;
default:
fprintf(stderr, "failed to fast clear %d\n", format);
return false;
@@ -994,18 +957,6 @@ void radv_GetPhysicalDeviceFormatProperties(
pFormatProperties);
}
-void radv_GetPhysicalDeviceFormatProperties2KHR(
- VkPhysicalDevice physicalDevice,
- VkFormat format,
- VkFormatProperties2KHR* pFormatProperties)
-{
- RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
-
- radv_physical_device_get_format_properties(physical_device,
- format,
- &pFormatProperties->formatProperties);
-}
-
VkResult radv_GetPhysicalDeviceImageFormatProperties(
VkPhysicalDevice physicalDevice,
VkFormat format,
@@ -1120,20 +1071,6 @@ unsupported:
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
-VkResult radv_GetPhysicalDeviceImageFormatProperties2KHR(
- VkPhysicalDevice physicalDevice,
- const VkPhysicalDeviceImageFormatInfo2KHR* pImageFormatInfo,
- VkImageFormatProperties2KHR *pImageFormatProperties)
-{
- return radv_GetPhysicalDeviceImageFormatProperties(physicalDevice,
- pImageFormatInfo->format,
- pImageFormatInfo->type,
- pImageFormatInfo->tiling,
- pImageFormatInfo->usage,
- pImageFormatInfo->flags,
- &pImageFormatProperties->imageFormatProperties);
-}
-
void radv_GetPhysicalDeviceSparseImageFormatProperties(
VkPhysicalDevice physicalDevice,
VkFormat format,
@@ -1147,13 +1084,3 @@ void radv_GetPhysicalDeviceSparseImageFormatProperties(
/* Sparse images are not yet supported. */
*pNumProperties = 0;
}
-
-void radv_GetPhysicalDeviceSparseImageFormatProperties2KHR(
- VkPhysicalDevice physicalDevice,
- const VkPhysicalDeviceSparseImageFormatInfo2KHR* pFormatInfo,
- uint32_t *pPropertyCount,
- VkSparseImageFormatProperties2KHR* pProperties)
-{
- /* Sparse images are not yet supported. */
- *pPropertyCount = 0;
-}
diff --git a/lib/mesa/src/amd/vulkan/radv_image.c b/lib/mesa/src/amd/vulkan/radv_image.c
index 7cf9c6765..9649158ea 100644
--- a/lib/mesa/src/amd/vulkan/radv_image.c
+++ b/lib/mesa/src/amd/vulkan/radv_image.c
@@ -41,13 +41,6 @@ radv_choose_tiling(struct radv_device *Device,
return RADEON_SURF_MODE_LINEAR_ALIGNED;
}
- /* Textures with a very small height are recommended to be linear. */
- if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
- /* Only very thin and long 2D textures should benefit from
- * linear_aligned. */
- (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
- return RADEON_SURF_MODE_LINEAR_ALIGNED;
-
/* MSAA resources must be 2D tiled. */
if (pCreateInfo->samples > 1)
return RADEON_SURF_MODE_2D;
@@ -119,8 +112,8 @@ radv_init_surface(struct radv_device *device,
VK_IMAGE_USAGE_STORAGE_BIT)) ||
(pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) ||
(pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
- device->physical_device->rad_info.chip_class < VI ||
- create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC) ||
+ device->instance->physicalDevice.rad_info.chip_class < VI ||
+ create_info->scanout || !device->allow_dcc ||
!radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable))
surface->flags |= RADEON_SURF_DISABLE_DCC;
if (create_info->scanout)
@@ -130,7 +123,7 @@ radv_init_surface(struct radv_device *device,
#define ATI_VENDOR_ID 0x1002
static uint32_t si_get_bo_metadata_word1(struct radv_device *device)
{
- return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
+ return (ATI_VENDOR_ID << 16) | device->instance->physicalDevice.rad_info.pci_id;
}
static inline unsigned
@@ -185,11 +178,6 @@ radv_make_buffer_descriptor(struct radv_device *device,
state[0] = va;
state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(stride);
-
- if (device->physical_device->rad_info.chip_class < VI && stride) {
- range /= stride;
- }
-
state[2] = range;
state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
@@ -213,7 +201,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
state[1] &= C_008F14_BASE_ADDRESS_HI;
state[3] &= C_008F1C_TILING_INDEX;
- state[4] &= C_008F20_PITCH_GFX6;
+ state[4] &= C_008F20_PITCH;
state[6] &= C_008F28_COMPRESSION_EN;
assert(!(va & 255));
@@ -222,7 +210,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level,
is_stencil));
- state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
+ state[4] |= S_008F20_PITCH(pitch - 1);
if (image->surface.dcc_size && image->surface.level[first_level].dcc_enabled) {
state[6] |= S_008F28_COMPRESSION_EN(1);
@@ -309,8 +297,8 @@ si_make_texture_descriptor(struct radv_device *device,
depth = image->array_size / 6;
state[0] = 0;
- state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) |
- S_008F14_NUM_FORMAT_GFX6(num_format));
+ state[1] = (S_008F14_DATA_FORMAT(data_format) |
+ S_008F14_NUM_FORMAT(num_format));
state[2] = (S_008F18_WIDTH(width - 1) |
S_008F18_HEIGHT(height - 1));
state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
@@ -338,7 +326,7 @@ si_make_texture_descriptor(struct radv_device *device,
/* The last dword is unused by hw. The shader uses it to clear
* bits in the first dword of sampler state.
*/
- if (device->physical_device->rad_info.chip_class <= CIK && image->samples <= 1) {
+ if (device->instance->physicalDevice.rad_info.chip_class <= CIK && image->samples <= 1) {
if (first_level == last_level)
state[7] = C_008F30_MAX_ANISO_RATIO;
else
@@ -371,8 +359,8 @@ si_make_texture_descriptor(struct radv_device *device,
fmask_state[0] = va >> 8;
fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
- S_008F14_DATA_FORMAT_GFX6(fmask_format) |
- S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_UINT);
+ S_008F14_DATA_FORMAT(fmask_format) |
+ S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
fmask_state[2] = S_008F18_WIDTH(width - 1) |
S_008F18_HEIGHT(height - 1);
fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
@@ -382,13 +370,12 @@ si_make_texture_descriptor(struct radv_device *device,
S_008F1C_TILING_INDEX(image->fmask.tile_mode_index) |
S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false));
fmask_state[4] = S_008F20_DEPTH(depth - 1) |
- S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1);
+ S_008F20_PITCH(image->fmask.pitch_in_pixels - 1);
fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) |
S_008F24_LAST_ARRAY(last_layer);
fmask_state[6] = 0;
fmask_state[7] = 0;
- } else if (fmask_state)
- memset(fmask_state, 0, 8 * 4);
+ }
}
static void
@@ -523,7 +510,6 @@ radv_image_alloc_fmask(struct radv_device *device,
image->fmask.offset = align64(image->size, image->fmask.alignment);
image->size = image->fmask.offset + image->fmask.size;
- image->alignment = MAX2(image->alignment, image->fmask.alignment);
}
static void
@@ -531,8 +517,8 @@ radv_image_get_cmask_info(struct radv_device *device,
struct radv_image *image,
struct radv_cmask_info *out)
{
- unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes;
- unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes;
+ unsigned pipe_interleave_bytes = device->instance->physicalDevice.rad_info.pipe_interleave_bytes;
+ unsigned num_pipes = device->instance->physicalDevice.rad_info.num_tile_pipes;
unsigned cl_width, cl_height;
switch (num_pipes) {
@@ -566,6 +552,10 @@ radv_image_get_cmask_info(struct radv_device *device,
/* Each element of CMASK is a nibble. */
unsigned slice_bytes = slice_elements / 2;
+ out->pitch = width;
+ out->height = height;
+ out->xalign = cl_width * 8;
+ out->yalign = cl_height * 8;
out->slice_tile_max = (width * height) / (128*128);
if (out->slice_tile_max)
out->slice_tile_max -= 1;
@@ -585,7 +575,6 @@ radv_image_alloc_cmask(struct radv_device *device,
/* + 8 for storing the clear values */
image->clear_value_offset = image->cmask.offset + image->cmask.size;
image->size = image->cmask.offset + image->cmask.size + 8;
- image->alignment = MAX2(image->alignment, image->cmask.alignment);
}
static void
@@ -596,24 +585,88 @@ radv_image_alloc_dcc(struct radv_device *device,
/* + 8 for storing the clear values */
image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
image->size = image->dcc_offset + image->surface.dcc_size + 8;
- image->alignment = MAX2(image->alignment, image->surface.dcc_alignment);
+}
+
+static unsigned
+radv_image_get_htile_size(struct radv_device *device,
+ struct radv_image *image)
+{
+ unsigned cl_width, cl_height, width, height;
+ unsigned slice_elements, slice_bytes, base_align;
+ unsigned num_pipes = device->instance->physicalDevice.rad_info.num_tile_pipes;
+ unsigned pipe_interleave_bytes = device->instance->physicalDevice.rad_info.pipe_interleave_bytes;
+
+ /* Overalign HTILE on P2 configs to work around GPU hangs in
+ * piglit/depthstencil-render-miplevels 585.
+ *
+ * This has been confirmed to help Kabini & Stoney, where the hangs
+ * are always reproducible. I think I have seen the test hang
+ * on Carrizo too, though it was very rare there.
+ */
+ if (device->instance->physicalDevice.rad_info.chip_class >= CIK && num_pipes < 4)
+ num_pipes = 4;
+
+ switch (num_pipes) {
+ case 1:
+ cl_width = 32;
+ cl_height = 16;
+ break;
+ case 2:
+ cl_width = 32;
+ cl_height = 32;
+ break;
+ case 4:
+ cl_width = 64;
+ cl_height = 32;
+ break;
+ case 8:
+ cl_width = 64;
+ cl_height = 64;
+ break;
+ case 16:
+ cl_width = 128;
+ cl_height = 64;
+ break;
+ default:
+ assert(0);
+ return 0;
+ }
+
+ width = align(image->surface.npix_x, cl_width * 8);
+ height = align(image->surface.npix_y, cl_height * 8);
+
+ slice_elements = (width * height) / (8 * 8);
+ slice_bytes = slice_elements * 4;
+
+ base_align = num_pipes * pipe_interleave_bytes;
+
+ image->htile.pitch = width;
+ image->htile.height = height;
+ image->htile.xalign = cl_width * 8;
+ image->htile.yalign = cl_height * 8;
+
+ return image->array_size *
+ align(slice_bytes, base_align);
}
static void
radv_image_alloc_htile(struct radv_device *device,
struct radv_image *image)
{
- if ((device->debug_flags & RADV_DEBUG_NO_HIZ) || image->levels > 1) {
- image->surface.htile_size = 0;
+ if (env_var_as_boolean("RADV_HIZ_DISABLE", false))
+ return;
+
+ image->htile.size = radv_image_get_htile_size(device, image);
+
+ if (!image->htile.size)
return;
- }
- image->htile_offset = align64(image->size, image->surface.htile_alignment);
+ image->htile.offset = align64(image->size, 32768);
/* + 8 for storing the clear values */
- image->clear_value_offset = image->htile_offset + image->surface.htile_size;
- image->size = image->clear_value_offset + 8;
- image->alignment = align64(image->alignment, image->surface.htile_alignment);
+ image->clear_value_offset = image->htile.offset + image->htile.size;
+ image->size = image->htile.offset + image->htile.size + 8;
+ image->alignment = align64(image->alignment, 32768);
}
VkResult
@@ -625,7 +678,7 @@ radv_image_create(VkDevice _device,
RADV_FROM_HANDLE(radv_device, device, _device);
const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
struct radv_image *image = NULL;
- bool can_cmask_dcc = false;
+
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
radv_assert(pCreateInfo->mipLevels > 0);
@@ -649,14 +702,6 @@ radv_image_create(VkDevice _device,
image->samples = pCreateInfo->samples;
image->tiling = pCreateInfo->tiling;
image->usage = pCreateInfo->usage;
- image->flags = pCreateInfo->flags;
-
- image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
- if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
- for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
- image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
- }
-
radv_init_surface(device, &image->surface, create_info);
device->ws->surface_init(device->ws, &image->surface);
@@ -664,18 +709,15 @@ radv_image_create(VkDevice _device,
image->size = image->surface.bo_size;
image->alignment = image->surface.bo_alignment;
- if (image->exclusive || image->queue_family_mask == 1)
- can_cmask_dcc = true;
-
if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
- image->surface.dcc_size && can_cmask_dcc)
+ image->surface.dcc_size)
radv_image_alloc_dcc(device, image);
else
image->surface.dcc_size = 0;
if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
pCreateInfo->mipLevels == 1 &&
- !image->surface.dcc_size && image->extent.depth == 1 && can_cmask_dcc)
+ !image->surface.dcc_size && image->extent.depth == 1)
radv_image_alloc_cmask(device, image);
if (image->samples > 1 && vk_format_is_color(pCreateInfo->format)) {
radv_image_alloc_fmask(device, image);
@@ -690,20 +732,6 @@ radv_image_create(VkDevice _device,
image->surface.level[0].pitch_bytes = create_info->stride;
image->surface.level[0].slice_size = create_info->stride * image->surface.level[0].nblk_y;
}
-
- if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
- image->alignment = MAX2(image->alignment, 4096);
- image->size = align64(image->size, image->alignment);
- image->offset = 0;
-
- image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
- 0, RADEON_FLAG_VIRTUAL);
- if (!image->bo) {
- vk_free2(&device->alloc, alloc, image);
- return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
- }
- }
-
*pImage = radv_image_to_handle(image);
return VK_SUCCESS;
@@ -718,7 +746,6 @@ radv_image_view_init(struct radv_image_view *iview,
{
RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
- uint32_t blk_w;
bool is_stencil = false;
switch (image->type) {
case VK_IMAGE_TYPE_1D:
@@ -756,8 +783,6 @@ radv_image_view_init(struct radv_image_view *iview,
iview->extent.height = round_up_u32(iview->extent.height * vk_format_get_blockheight(iview->vk_format),
vk_format_get_blockheight(image->vk_format));
- assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0);
- blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format);
iview->base_layer = range->baseArrayLayer;
iview->layer_count = radv_get_layerCount(image, range);
iview->base_mip = range->baseMipLevel;
@@ -777,7 +802,7 @@ radv_image_view_init(struct radv_image_view *iview,
si_set_mutable_tex_desc_fields(device, image,
is_stencil ? &image->surface.stencil_level[range->baseMipLevel] : &image->surface.level[range->baseMipLevel], range->baseMipLevel,
range->baseMipLevel,
- blk_w, is_stencil, iview->descriptor);
+ image->surface.blk_w, is_stencil, iview->descriptor);
}
void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
@@ -787,7 +812,7 @@ void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
* definitions for them either. They are all 2D_TILED_THIN1 modes with
* different bpp and micro tile mode.
*/
- if (device->physical_device->rad_info.chip_class >= CIK) {
+ if (device->instance->physicalDevice.rad_info.chip_class >= CIK) {
switch (micro_tile_mode) {
case 0: /* displayable */
image->surface.tiling_index[0] = 10;
@@ -862,22 +887,11 @@ bool radv_layout_can_expclear(const struct radv_image *image,
layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
}
-bool radv_layout_can_fast_clear(const struct radv_image *image,
- VkImageLayout layout,
- unsigned queue_mask)
+bool radv_layout_has_cmask(const struct radv_image *image,
+ VkImageLayout layout)
{
- return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
- queue_mask == (1u << RADV_QUEUE_GENERAL);
-}
-
-
-unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
-{
- if (!image->exclusive)
- return image->queue_family_mask;
- if (family == VK_QUEUE_FAMILY_IGNORED)
- return 1u << queue_family;
- return 1u << family;
+ return (layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL ||
+ layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
}
VkResult
@@ -900,15 +914,11 @@ radv_DestroyImage(VkDevice _device, VkImage _image,
const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_image, image, _image);
- if (!image)
+ if (!_image)
return;
- if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
- device->ws->buffer_destroy(image->bo);
-
- vk_free2(&device->alloc, pAllocator, image);
+ vk_free2(&device->alloc, pAllocator, radv_image_from_handle(_image));
}
void radv_GetImageSubresourceLayout(
diff --git a/lib/mesa/src/amd/vulkan/radv_meta.c b/lib/mesa/src/amd/vulkan/radv_meta.c
index fac0dcf6d..04fa247dd 100644
--- a/lib/mesa/src/amd/vulkan/radv_meta.c
+++ b/lib/mesa/src/amd/vulkan/radv_meta.c
@@ -51,10 +51,8 @@ void
radv_meta_restore(const struct radv_meta_saved_state *state,
struct radv_cmd_buffer *cmd_buffer)
{
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
- radv_pipeline_to_handle(state->old_pipeline));
-
- cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
+ cmd_buffer->state.pipeline = state->old_pipeline;
+ radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0);
memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings,
sizeof(state->old_vertex_bindings));
@@ -112,8 +110,7 @@ radv_meta_restore_compute(const struct radv_meta_saved_compute_state *state,
{
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
radv_pipeline_to_handle(state->old_pipeline));
-
- cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
+ radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0);
if (push_constant_size) {
memcpy(cmd_buffer->push_constants, state->push_constants, push_constant_size);
@@ -327,10 +324,6 @@ radv_device_init_meta(struct radv_device *device)
if (result != VK_SUCCESS)
goto fail_buffer;
- result = radv_device_init_meta_query_state(device);
- if (result != VK_SUCCESS)
- goto fail_query;
-
result = radv_device_init_meta_fast_clear_flush_state(device);
if (result != VK_SUCCESS)
goto fail_fast_clear;
@@ -344,8 +337,6 @@ fail_resolve_compute:
radv_device_finish_meta_fast_clear_flush_state(device);
fail_fast_clear:
radv_device_finish_meta_buffer_state(device);
-fail_query:
- radv_device_finish_meta_query_state(device);
fail_buffer:
radv_device_finish_meta_depth_decomp_state(device);
fail_depth_decomp:
@@ -372,7 +363,6 @@ radv_device_finish_meta(struct radv_device *device)
radv_device_finish_meta_blit2d_state(device);
radv_device_finish_meta_bufimage_state(device);
radv_device_finish_meta_depth_decomp_state(device);
- radv_device_finish_meta_query_state(device);
radv_device_finish_meta_buffer_state(device);
radv_device_finish_meta_fast_clear_flush_state(device);
radv_device_finish_meta_resolve_compute_state(device);
diff --git a/lib/mesa/src/amd/vulkan/radv_meta.h b/lib/mesa/src/amd/vulkan/radv_meta.h
index 6cfc6134c..97d020cea 100644
--- a/lib/mesa/src/amd/vulkan/radv_meta.h
+++ b/lib/mesa/src/amd/vulkan/radv_meta.h
@@ -85,9 +85,6 @@ void radv_device_finish_meta_blit2d_state(struct radv_device *device);
VkResult radv_device_init_meta_buffer_state(struct radv_device *device);
void radv_device_finish_meta_buffer_state(struct radv_device *device);
-VkResult radv_device_init_meta_query_state(struct radv_device *device);
-void radv_device_finish_meta_query_state(struct radv_device *device);
-
VkResult radv_device_init_meta_resolve_compute_state(struct radv_device *device);
void radv_device_finish_meta_resolve_compute_state(struct radv_device *device);
void radv_meta_save(struct radv_meta_saved_state *state,
@@ -162,34 +159,13 @@ void radv_meta_begin_bufimage(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_saved_compute_state *save);
void radv_meta_end_bufimage(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_saved_compute_state *save);
-void radv_meta_begin_itoi(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save);
-void radv_meta_end_itoi(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save);
-void radv_meta_begin_cleari(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save);
-void radv_meta_end_cleari(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save);
+
void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *src,
struct radv_meta_blit2d_buffer *dst,
unsigned num_rects,
struct radv_meta_blit2d_rect *rects);
-void radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_buffer *src,
- struct radv_meta_blit2d_surf *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects);
-void radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src,
- struct radv_meta_blit2d_surf *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects);
-void radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *dst,
- const VkClearColorValue *clear_color);
-
void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkImageSubresourceRange *subresourceRange);
@@ -197,8 +173,7 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkImageSubresourceRange *subresourceRange);
void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange);
+ struct radv_image *image);
void radv_meta_save_graphics_reset_vport_scissor(struct radv_meta_saved_state *saved_state,
struct radv_cmd_buffer *cmd_buffer);
@@ -211,9 +186,6 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
uint32_t region_count,
const VkImageResolve *regions);
-void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- struct radv_image *linear_image);
#ifdef __cplusplus
}
#endif
diff --git a/lib/mesa/src/amd/vulkan/radv_meta_blit.c b/lib/mesa/src/amd/vulkan/radv_meta_blit.c
index a3256ab05..dfba8a897 100644
--- a/lib/mesa/src/amd/vulkan/radv_meta_blit.c
+++ b/lib/mesa/src/amd/vulkan/radv_meta_blit.c
@@ -38,7 +38,7 @@ build_nir_vertex_shader(void)
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "meta_blit_vs");
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec4, "a_pos");
@@ -70,7 +70,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
sprintf(shader_name, "meta_blit_fs.%d", tex_dim);
- b.shader->info->name = ralloc_strdup(b.shader, shader_name);
+ b.shader->info.name = ralloc_strdup(b.shader, shader_name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec4, "v_tex_pos");
@@ -124,7 +124,7 @@ build_nir_copy_fragment_shader_depth(enum glsl_sampler_dim tex_dim)
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
sprintf(shader_name, "meta_blit_depth_fs.%d", tex_dim);
- b.shader->info->name = ralloc_strdup(b.shader, shader_name);
+ b.shader->info.name = ralloc_strdup(b.shader, shader_name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec4, "v_tex_pos");
@@ -178,7 +178,7 @@ build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim)
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
sprintf(shader_name, "meta_blit_stencil_fs.%d", tex_dim);
- b.shader->info->name = ralloc_strdup(b.shader, shader_name);
+ b.shader->info.name = ralloc_strdup(b.shader, shader_name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec4, "v_tex_pos");
@@ -226,13 +226,12 @@ static void
meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *src_image,
struct radv_image_view *src_iview,
- VkOffset3D src_offset_0,
- VkOffset3D src_offset_1,
+ VkOffset3D src_offset,
+ VkExtent3D src_extent,
struct radv_image *dest_image,
struct radv_image_view *dest_iview,
- VkOffset3D dest_offset_0,
- VkOffset3D dest_offset_1,
- VkRect2D dest_box,
+ VkOffset3D dest_offset,
+ VkExtent3D dest_extent,
VkFilter blit_filter)
{
struct radv_device *device = cmd_buffer->device;
@@ -246,37 +245,38 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
unsigned vb_size = 3 * sizeof(*vb_data);
vb_data[0] = (struct blit_vb_data) {
.pos = {
- -1.0,
- -1.0,
+ dest_offset.x,
+ dest_offset.y,
},
.tex_coord = {
- (float)src_offset_0.x / (float)src_iview->extent.width,
- (float)src_offset_0.y / (float)src_iview->extent.height,
- (float)src_offset_0.z / (float)src_iview->extent.depth,
+ (float)(src_offset.x) / (float)src_iview->extent.width,
+ (float)(src_offset.y) / (float)src_iview->extent.height,
+ (float)src_offset.z / (float)src_iview->extent.depth,
},
};
vb_data[1] = (struct blit_vb_data) {
.pos = {
- -1.0,
- 1.0,
+ dest_offset.x,
+ dest_offset.y + dest_extent.height,
},
.tex_coord = {
- (float)src_offset_0.x / (float)src_iview->extent.width,
- (float)src_offset_1.y / (float)src_iview->extent.height,
- (float)src_offset_0.z / (float)src_iview->extent.depth,
+ (float)src_offset.x / (float)src_iview->extent.width,
+ (float)(src_offset.y + src_extent.height) /
+ (float)src_iview->extent.height,
+ (float)src_offset.z / (float)src_iview->extent.depth,
},
};
vb_data[2] = (struct blit_vb_data) {
.pos = {
- 1.0,
- -1.0,
+ dest_offset.x + dest_extent.width,
+ dest_offset.y,
},
.tex_coord = {
- (float)src_offset_1.x / (float)src_iview->extent.width,
- (float)src_offset_0.y / (float)src_iview->extent.height,
- (float)src_offset_0.z / (float)src_iview->extent.depth,
+ (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width,
+ (float)src_offset.y / (float)src_iview->extent.height,
+ (float)src_offset.z / (float)src_iview->extent.depth,
},
};
radv_cmd_buffer_upload_data(cmd_buffer, vb_size, 16, vb_data, &offset);
@@ -307,6 +307,31 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
}, &cmd_buffer->pool->alloc, &sampler);
+ VkDescriptorSet set;
+ radv_temp_descriptor_set_create(cmd_buffer->device, cmd_buffer,
+ device->meta_state.blit.ds_layout,
+ &set);
+
+ radv_UpdateDescriptorSets(radv_device_to_handle(device),
+ 1, /* writeCount */
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = set,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = sampler,
+ .imageView = radv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }
+ }
+ }, 0, NULL);
+
VkFramebuffer fb;
radv_CreateFramebuffer(radv_device_to_handle(device),
&(VkFramebufferCreateInfo) {
@@ -330,8 +355,8 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
.renderPass = device->meta_state.blit.render_pass[fs_key],
.framebuffer = fb,
.renderArea = {
- .offset = { dest_box.offset.x, dest_box.offset.y },
- .extent = { dest_box.extent.width, dest_box.extent.height },
+ .offset = { dest_offset.x, dest_offset.y },
+ .extent = { dest_extent.width, dest_extent.height },
},
.clearValueCount = 0,
.pClearValues = NULL,
@@ -358,8 +383,8 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
.renderPass = device->meta_state.blit.depth_only_rp,
.framebuffer = fb,
.renderArea = {
- .offset = { dest_box.offset.x, dest_box.offset.y },
- .extent = { dest_box.extent.width, dest_box.extent.height },
+ .offset = { dest_offset.x, dest_offset.y },
+ .extent = { dest_extent.width, dest_extent.height },
},
.clearValueCount = 0,
.pClearValues = NULL,
@@ -385,9 +410,9 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
.renderPass = device->meta_state.blit.stencil_only_rp,
.framebuffer = fb,
.renderArea = {
- .offset = { dest_box.offset.x, dest_box.offset.y },
- .extent = { dest_box.extent.width, dest_box.extent.height },
- },
+ .offset = { dest_offset.x, dest_offset.y },
+ .extent = { dest_extent.width, dest_extent.height },
+ },
.clearValueCount = 0,
.pClearValues = NULL,
}, VK_SUBPASS_CONTENTS_INLINE);
@@ -414,43 +439,10 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
}
- radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
- device->meta_state.blit.pipeline_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = sampler,
- .imageView = radv_image_view_to_handle(src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- }
- });
-
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = dest_offset_0.x,
- .y = dest_offset_0.y,
- .width = dest_offset_1.x - dest_offset_0.x,
- .height = dest_offset_1.y - dest_offset_0.y,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
- .offset = (VkOffset2D) { MIN2(dest_offset_0.x, dest_offset_1.x), MIN2(dest_offset_0.y, dest_offset_1.y) },
- .extent = (VkExtent2D) {
- abs(dest_offset_1.x - dest_offset_0.x),
- abs(dest_offset_1.y - dest_offset_0.y)
- },
- });
+ radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ device->meta_state.blit.pipeline_layout, 0, 1,
+ &set, 0, NULL);
radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
@@ -462,32 +454,13 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
/* TODO: above comment is not valid for at least descriptor sets/pools,
* as we may not free them till after execution finishes. Check others. */
+ radv_temp_descriptor_set_destroy(cmd_buffer->device, set);
radv_DestroySampler(radv_device_to_handle(device), sampler,
&cmd_buffer->pool->alloc);
radv_DestroyFramebuffer(radv_device_to_handle(device), fb,
&cmd_buffer->pool->alloc);
}
-static bool
-flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
-{
- bool flip = false;
- if (*src0 > *src1) {
- unsigned tmp = *src0;
- *src0 = *src1;
- *src1 = tmp;
- flip = !flip;
- }
-
- if (*dst0 > *dst1) {
- unsigned tmp = *dst0;
- *dst0 = *dst1;
- *dst1 = tmp;
- flip = !flip;
- }
- return flip;
-}
-
void radv_CmdBlitImage(
VkCommandBuffer commandBuffer,
VkImage srcImage,
@@ -515,8 +488,6 @@ void radv_CmdBlitImage(
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
for (unsigned r = 0; r < regionCount; r++) {
- const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
- const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
struct radv_image_view src_iview;
radv_image_view_init(&src_iview, cmd_buffer->device,
&(VkImageViewCreateInfo) {
@@ -525,92 +496,59 @@ void radv_CmdBlitImage(
.viewType = radv_meta_get_view_type(src_image),
.format = src_image->vk_format,
.subresourceRange = {
- .aspectMask = src_res->aspectMask,
- .baseMipLevel = src_res->mipLevel,
+ .aspectMask = pRegions[r].srcSubresource.aspectMask,
+ .baseMipLevel = pRegions[r].srcSubresource.mipLevel,
.levelCount = 1,
- .baseArrayLayer = src_res->baseArrayLayer,
+ .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
.layerCount = 1
},
},
cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);
- unsigned dst_start, dst_end;
- if (dest_image->type == VK_IMAGE_TYPE_3D) {
- assert(dst_res->baseArrayLayer == 0);
- dst_start = pRegions[r].dstOffsets[0].z;
- dst_end = pRegions[r].dstOffsets[1].z;
- } else {
- dst_start = dst_res->baseArrayLayer;
- dst_end = dst_start + dst_res->layerCount;
- }
+ if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x ||
+ pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y ||
+ pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x ||
+ pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y)
+ radv_finishme("FINISHME: Allow flipping in blits");
- unsigned src_start, src_end;
- if (src_image->type == VK_IMAGE_TYPE_3D) {
- assert(src_res->baseArrayLayer == 0);
- src_start = pRegions[r].srcOffsets[0].z;
- src_end = pRegions[r].srcOffsets[1].z;
- } else {
- src_start = src_res->baseArrayLayer;
- src_end = src_start + src_res->layerCount;
- }
-
- bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
- float src_z_step = (float)(src_end + 1 - src_start) /
- (float)(dst_end + 1 - dst_start);
-
- if (flip_z) {
- src_start = src_end;
- src_z_step *= -1;
- }
+ const VkExtent3D dest_extent = {
+ .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x,
+ .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y,
+ .depth = 1,
+ };
- unsigned src_x0 = pRegions[r].srcOffsets[0].x;
- unsigned src_x1 = pRegions[r].srcOffsets[1].x;
- unsigned dst_x0 = pRegions[r].dstOffsets[0].x;
- unsigned dst_x1 = pRegions[r].dstOffsets[1].x;
+ const VkExtent3D src_extent = {
+ .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x,
+ .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y,
+ .depth = pRegions[r].srcOffsets[1].z - pRegions[r].srcOffsets[0].z,
+ };
- unsigned src_y0 = pRegions[r].srcOffsets[0].y;
- unsigned src_y1 = pRegions[r].srcOffsets[1].y;
- unsigned dst_y0 = pRegions[r].dstOffsets[0].y;
- unsigned dst_y1 = pRegions[r].dstOffsets[1].y;
- VkRect2D dest_box;
- dest_box.offset.x = MIN2(dst_x0, dst_x1);
- dest_box.offset.y = MIN2(dst_y0, dst_y1);
- dest_box.extent.width = abs(dst_x1 - dst_x0);
- dest_box.extent.height = abs(dst_y1 - dst_y0);
+ if (pRegions[r].srcSubresource.layerCount > 1)
+ radv_finishme("FINISHME: copy multiple array layers");
struct radv_image_view dest_iview;
unsigned usage;
- if (dst_res->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
+ if (pRegions[r].dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
else
usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
- const unsigned num_layers = dst_end - dst_start;
- for (unsigned i = 0; i < num_layers; i++) {
- const VkOffset3D dest_offset_0 = {
- .x = dst_x0,
- .y = dst_y0,
- .z = dst_start + i ,
- };
- const VkOffset3D dest_offset_1 = {
- .x = dst_x1,
- .y = dst_y1,
- .z = dst_start + i ,
- };
- VkOffset3D src_offset_0 = {
- .x = src_x0,
- .y = src_y0,
- .z = src_start + i * src_z_step,
+ for (unsigned i = pRegions[r].dstOffsets[0].z; i < pRegions[r].dstOffsets[1].z; i++) {
+
+ const VkOffset3D dest_offset = {
+ .x = pRegions[r].dstOffsets[0].x,
+ .y = pRegions[r].dstOffsets[0].y,
+ .z = i,
};
- VkOffset3D src_offset_1 = {
- .x = src_x1,
- .y = src_y1,
- .z = src_start + i * src_z_step,
+ VkOffset3D src_offset = {
+ .x = pRegions[r].srcOffsets[0].x,
+ .y = pRegions[r].srcOffsets[0].y,
+ .z = i,
};
const uint32_t dest_array_slice =
- radv_meta_get_iview_layer(dest_image, dst_res,
- &dest_offset_0);
+ radv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource,
+ &dest_offset);
radv_image_view_init(&dest_iview, cmd_buffer->device,
&(VkImageViewCreateInfo) {
@@ -619,8 +557,8 @@ void radv_CmdBlitImage(
.viewType = radv_meta_get_view_type(dest_image),
.format = dest_image->vk_format,
.subresourceRange = {
- .aspectMask = dst_res->aspectMask,
- .baseMipLevel = dst_res->mipLevel,
+ .aspectMask = pRegions[r].dstSubresource.aspectMask,
+ .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = dest_array_slice,
.layerCount = 1
@@ -629,10 +567,9 @@ void radv_CmdBlitImage(
cmd_buffer, usage);
meta_emit_blit(cmd_buffer,
src_image, &src_iview,
- src_offset_0, src_offset_1,
+ src_offset, src_extent,
dest_image, &dest_iview,
- dest_offset_0, dest_offset_1,
- dest_box,
+ dest_offset, dest_extent,
filter);
}
}
@@ -820,8 +757,8 @@ radv_device_init_meta_blit_color(struct radv_device *device,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
+ .viewportCount = 0,
+ .scissorCount = 0,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
@@ -849,10 +786,8 @@ radv_device_init_meta_blit_color(struct radv_device *device,
},
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 4,
+ .dynamicStateCount = 2,
.pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_LINE_WIDTH,
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
},
@@ -999,8 +934,8 @@ radv_device_init_meta_blit_depth(struct radv_device *device,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
+ .viewportCount = 0,
+ .scissorCount = 0,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
@@ -1028,10 +963,8 @@ radv_device_init_meta_blit_depth(struct radv_device *device,
},
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 9,
+ .dynamicStateCount = 7,
.pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_LINE_WIDTH,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
@@ -1180,8 +1113,8 @@ radv_device_init_meta_blit_stencil(struct radv_device *device,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
+ .viewportCount = 0,
+ .scissorCount = 0,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
@@ -1229,10 +1162,8 @@ radv_device_init_meta_blit_stencil(struct radv_device *device,
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 6,
+ .dynamicStateCount = 4,
.pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_LINE_WIDTH,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
@@ -1289,7 +1220,6 @@ radv_device_init_meta_blit_state(struct radv_device *device)
VkDescriptorSetLayoutCreateInfo ds_layout_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 1,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
diff --git a/lib/mesa/src/amd/vulkan/radv_meta_blit2d.c b/lib/mesa/src/amd/vulkan/radv_meta_blit2d.c
index f69fec8ea..52e142f68 100644
--- a/lib/mesa/src/amd/vulkan/radv_meta_blit2d.c
+++ b/lib/mesa/src/amd/vulkan/radv_meta_blit2d.c
@@ -26,7 +26,6 @@
#include "radv_meta.h"
#include "nir/nir_builder.h"
-#include "vk_format.h"
enum blit2d_dst_type {
/* We can bind this destination as a "normal" render target and render
@@ -104,6 +103,8 @@ create_bview(struct radv_cmd_buffer *cmd_buffer,
struct blit2d_src_temps {
struct radv_image_view iview;
+
+ VkDescriptorSet set;
struct radv_buffer_view bview;
};
@@ -111,28 +112,33 @@ static void
blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *src_img,
struct radv_meta_blit2d_buffer *src_buf,
+ struct radv_meta_blit2d_rect *rect,
struct blit2d_src_temps *tmp,
enum blit2d_src_type src_type, VkFormat depth_format)
{
struct radv_device *device = cmd_buffer->device;
+ VkDevice vk_device = radv_device_to_handle(cmd_buffer->device);
if (src_type == BLIT2D_SRC_TYPE_BUFFER) {
create_bview(cmd_buffer, src_buf, &tmp->bview, depth_format);
- radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
- device->meta_state.blit2d.p_layouts[src_type],
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->bview) }
- }
- });
+ radv_temp_descriptor_set_create(cmd_buffer->device, cmd_buffer,
+ device->meta_state.blit2d.ds_layouts[src_type],
+ &tmp->set);
+
+ radv_UpdateDescriptorSets(vk_device,
+ 1, /* writeCount */
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = tmp->set,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->bview) }
+ }
+ }, 0, NULL);
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.blit2d.p_layouts[src_type],
@@ -142,27 +148,44 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
create_iview(cmd_buffer, src_img, VK_IMAGE_USAGE_SAMPLED_BIT, &tmp->iview,
depth_format);
- radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
- device->meta_state.blit2d.p_layouts[src_type],
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&tmp->iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- }
- });
+ radv_temp_descriptor_set_create(cmd_buffer->device, cmd_buffer,
+ device->meta_state.blit2d.ds_layouts[src_type],
+ &tmp->set);
+
+ radv_UpdateDescriptorSets(vk_device,
+ 1, /* writeCount */
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = tmp->set,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = NULL,
+ .imageView = radv_image_view_to_handle(&tmp->iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }
+ }
+ }, 0, NULL);
+
}
+
+ radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ device->meta_state.blit2d.p_layouts[src_type], 0, 1,
+ &tmp->set, 0, NULL);
+}
+
+static void
+blit2d_unbind_src(struct radv_cmd_buffer *cmd_buffer,
+ struct blit2d_src_temps *tmp,
+ enum blit2d_src_type src_type)
+{
+ radv_temp_descriptor_set_destroy(cmd_buffer->device, tmp->set);
}
struct blit2d_dst_temps {
@@ -261,12 +284,10 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
for (unsigned r = 0; r < num_rects; ++r) {
VkFormat depth_format = 0;
- if (dst->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
- depth_format = vk_format_stencil_only(dst->image->vk_format);
- else if (dst->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
- depth_format = vk_format_depth_only(dst->image->vk_format);
+ if (dst->aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT)
+ depth_format = dst->image->vk_format;
struct blit2d_src_temps src_temps;
- blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format);
+ blit2d_bind_src(cmd_buffer, src_img, src_buf, &rects[r], &src_temps, src_type, depth_format);
uint32_t offset = 0;
struct blit2d_dst_temps dst_temps;
@@ -282,8 +303,8 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
vb_data[0] = (struct blit_vb_data) {
.pos = {
- -1.0,
- -1.0,
+ rects[r].dst_x,
+ rects[r].dst_y,
},
.tex_coord = {
rects[r].src_x,
@@ -293,8 +314,8 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
vb_data[1] = (struct blit_vb_data) {
.pos = {
- -1.0,
- 1.0,
+ rects[r].dst_x,
+ rects[r].dst_y + rects[r].height,
},
.tex_coord = {
rects[r].src_x,
@@ -304,8 +325,8 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
vb_data[2] = (struct blit_vb_data) {
.pos = {
- 1.0,
- -1.0,
+ rects[r].dst_x + rects[r].width,
+ rects[r].dst_y,
},
.tex_coord = {
rects[r].src_x + rects[r].width,
@@ -385,28 +406,13 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
bind_stencil_pipeline(cmd_buffer, src_type);
}
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = rects[r].dst_x,
- .y = rects[r].dst_y,
- .width = rects[r].width,
- .height = rects[r].height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
- .offset = (VkOffset2D) { rects[r].dst_x, rects[r].dst_y },
- .extent = (VkExtent2D) { rects[r].width, rects[r].height },
- });
-
-
-
radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
/* At the point where we emit the draw call, all data from the
* descriptor sets, etc. has been used. We are free to delete it.
*/
+ blit2d_unbind_src(cmd_buffer, &src_temps, src_type);
blit2d_unbind_dst(cmd_buffer, &dst_temps);
}
}
@@ -433,7 +439,7 @@ build_nir_vertex_shader(void)
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "meta_blit_vs");
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec4, "a_pos");
@@ -568,7 +574,7 @@ build_nir_copy_fragment_shader(struct radv_device *device,
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, name);
+ b.shader->info.name = ralloc_strdup(b.shader, name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec2, "v_tex_pos");
@@ -578,7 +584,7 @@ build_nir_copy_fragment_shader(struct radv_device *device,
vec4, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
- nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
+ nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
unsigned swiz[4] = { 0, 1 };
nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
@@ -597,7 +603,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device,
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, name);
+ b.shader->info.name = ralloc_strdup(b.shader, name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec2, "v_tex_pos");
@@ -607,7 +613,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device,
vec4, "f_color");
color_out->data.location = FRAG_RESULT_DEPTH;
- nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
+ nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
unsigned swiz[4] = { 0, 1 };
nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
@@ -626,7 +632,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device,
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, name);
+ b.shader->info.name = ralloc_strdup(b.shader, name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec2, "v_tex_pos");
@@ -636,7 +642,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device,
vec4, "f_color");
color_out->data.location = FRAG_RESULT_STENCIL;
- nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
+ nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
unsigned swiz[4] = { 0, 1 };
nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
@@ -790,8 +796,8 @@ blit2d_init_color_pipeline(struct radv_device *device,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
+ .viewportCount = 0,
+ .scissorCount = 0,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
@@ -819,10 +825,8 @@ blit2d_init_color_pipeline(struct radv_device *device,
},
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 9,
+ .dynamicStateCount = 7,
.pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_LINE_WIDTH,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
@@ -945,8 +949,8 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
+ .viewportCount = 0,
+ .scissorCount = 0,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
@@ -974,10 +978,8 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
},
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 9,
+ .dynamicStateCount = 7,
.pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_LINE_WIDTH,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
@@ -1100,8 +1102,8 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
+ .viewportCount = 0,
+ .scissorCount = 0,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
@@ -1148,10 +1150,8 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
},
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 6,
+ .dynamicStateCount = 4,
.pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_LINE_WIDTH,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
@@ -1204,7 +1204,6 @@ radv_device_init_meta_blit2d_state(struct radv_device *device)
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
&(VkDescriptorSetLayoutCreateInfo) {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 1,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
@@ -1232,7 +1231,6 @@ radv_device_init_meta_blit2d_state(struct radv_device *device)
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
&(VkDescriptorSetLayoutCreateInfo) {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 1,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
diff --git a/lib/mesa/src/amd/vulkan/radv_meta_buffer.c b/lib/mesa/src/amd/vulkan/radv_meta_buffer.c
index 0bb926fa9..adea25e02 100644
--- a/lib/mesa/src/amd/vulkan/radv_meta_buffer.c
+++ b/lib/mesa/src/amd/vulkan/radv_meta_buffer.c
@@ -10,17 +10,17 @@ build_buffer_fill_shader(struct radv_device *dev)
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_fill");
- b.shader->info->cs.local_size[0] = 64;
- b.shader->info->cs.local_size[1] = 1;
- b.shader->info->cs.local_size[2] = 1;
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info->cs.local_size[0],
- b.shader->info->cs.local_size[1],
- b.shader->info->cs.local_size[2], 0);
+ b.shader->info.cs.local_size[0],
+ b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
@@ -60,17 +60,17 @@ build_buffer_copy_shader(struct radv_device *dev)
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_copy");
- b.shader->info->cs.local_size[0] = 64;
- b.shader->info->cs.local_size[1] = 1;
- b.shader->info->cs.local_size[2] = 1;
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info->cs.local_size[0],
- b.shader->info->cs.local_size[1],
- b.shader->info->cs.local_size[2], 0);
+ b.shader->info.cs.local_size[0],
+ b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
@@ -126,7 +126,6 @@ VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 1,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
@@ -148,7 +147,6 @@ VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 2,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
@@ -299,37 +297,46 @@ static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
struct radv_device *device = cmd_buffer->device;
uint64_t block_count = round_up_u64(size, 1024);
struct radv_meta_saved_compute_state saved_state;
+ VkDescriptorSet ds;
radv_meta_save_compute(&saved_state, cmd_buffer, 4);
+ radv_temp_descriptor_set_create(device, cmd_buffer,
+ device->meta_state.buffer.fill_ds_layout,
+ &ds);
+
struct radv_buffer dst_buffer = {
.bo = bo,
.offset = offset,
.size = size
};
+ radv_UpdateDescriptorSets(radv_device_to_handle(device),
+ 1, /* writeCount */
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = ds,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo) {
+ .buffer = radv_buffer_to_handle(&dst_buffer),
+ .offset = 0,
+ .range = size
+ }
+ }
+ }, 0, NULL);
+
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.buffer.fill_pipeline);
- radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.buffer.fill_p_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo) {
- .buffer = radv_buffer_to_handle(&dst_buffer),
- .offset = 0,
- .range = size
- }
- }
- });
+ radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.buffer.fill_p_layout, 0, 1,
+ &ds, 0, NULL);
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.buffer.fill_p_layout,
@@ -338,6 +345,8 @@ static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
+ radv_temp_descriptor_set_destroy(device, ds);
+
radv_meta_restore_compute(&saved_state, cmd_buffer, 4);
}
@@ -350,9 +359,14 @@ static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
struct radv_device *device = cmd_buffer->device;
uint64_t block_count = round_up_u64(size, 1024);
struct radv_meta_saved_compute_state saved_state;
+ VkDescriptorSet ds;
radv_meta_save_compute(&saved_state, cmd_buffer, 0);
+ radv_temp_descriptor_set_create(device, cmd_buffer,
+ device->meta_state.buffer.copy_ds_layout,
+ &ds);
+
struct radv_buffer dst_buffer = {
.bo = dst_bo,
.offset = dst_offset,
@@ -365,43 +379,51 @@ static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
.size = size
};
+ radv_UpdateDescriptorSets(radv_device_to_handle(device),
+ 2, /* writeCount */
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = ds,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo) {
+ .buffer = radv_buffer_to_handle(&dst_buffer),
+ .offset = 0,
+ .range = size
+ }
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = ds,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo) {
+ .buffer = radv_buffer_to_handle(&src_buffer),
+ .offset = 0,
+ .range = size
+ }
+ }
+ }, 0, NULL);
+
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.buffer.copy_pipeline);
- radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.buffer.copy_p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo) {
- .buffer = radv_buffer_to_handle(&dst_buffer),
- .offset = 0,
- .range = size
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo) {
- .buffer = radv_buffer_to_handle(&src_buffer),
- .offset = 0,
- .range = size
- }
- }
- });
+ radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.buffer.copy_p_layout, 0, 1,
+ &ds, 0, NULL);
+
radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
+ radv_temp_descriptor_set_destroy(device, ds);
+
radv_meta_restore_compute(&saved_state, cmd_buffer, 0);
}
@@ -489,11 +511,10 @@ void radv_CmdUpdateBuffer(
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize dataSize,
- const void* pData)
+ const uint32_t* pData)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
- bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
uint64_t words = dataSize / 4;
uint64_t va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo);
va += dstOffset + dst_buffer->offset;
@@ -501,26 +522,18 @@ void radv_CmdUpdateBuffer(
assert(!(dataSize & 3));
assert(!(va & 3));
- if (!dataSize)
- return;
-
if (dataSize < 4096) {
- si_emit_cache_flush(cmd_buffer);
-
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8);
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
- radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
- V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
+ radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cmd_buffer->cs, va);
radeon_emit(cmd_buffer->cs, va >> 32);
radeon_emit_array(cmd_buffer->cs, pData, words);
-
- radv_cmd_buffer_trace_emit(cmd_buffer);
} else {
uint32_t buf_offset;
radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset);
diff --git a/lib/mesa/src/amd/vulkan/radv_meta_bufimage.c b/lib/mesa/src/amd/vulkan/radv_meta_bufimage.c
index 09a29d2d0..287ab3f25 100644
--- a/lib/mesa/src/amd/vulkan/radv_meta_bufimage.c
+++ b/lib/mesa/src/amd/vulkan/radv_meta_bufimage.c
@@ -1,34 +1,6 @@
-/*
- * Copyright © 2016 Red Hat.
- * Copyright © 2016 Bas Nieuwenhuizen
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
#include "radv_meta.h"
#include "nir/nir_builder.h"
-/*
- * GFX queue: Compute shader implementation of image->buffer copy
- * Compute queue: implementation also of buffer->image, image->image, and image clear.
- */
-
static nir_shader *
build_nir_itob_compute_shader(struct radv_device *dev)
{
@@ -42,10 +14,10 @@ build_nir_itob_compute_shader(struct radv_device *dev)
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "meta_itob_cs");
- b.shader->info->cs.local_size[0] = 16;
- b.shader->info->cs.local_size[1] = 16;
- b.shader->info->cs.local_size[2] = 1;
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_itob_cs");
+ b.shader->info.cs.local_size[0] = 16;
+ b.shader->info.cs.local_size[1] = 16;
+ b.shader->info.cs.local_size[2] = 1;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
sampler_type, "s_tex");
input_img->data.descriptor_set = 0;
@@ -59,9 +31,9 @@ build_nir_itob_compute_shader(struct radv_device *dev)
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info->cs.local_size[0],
- b.shader->info->cs.local_size[1],
- b.shader->info->cs.local_size[2], 0);
+ b.shader->info.cs.local_size[0],
+ b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
@@ -85,7 +57,7 @@ build_nir_itob_compute_shader(struct radv_device *dev)
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
tex->op = nir_texop_txf;
tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, 0x3));
+ tex->src[0].src = nir_src_for_ssa(img_coord);
tex->src[1].src_type = nir_tex_src_lod;
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
tex->dest_type = nir_type_float;
@@ -133,7 +105,6 @@ radv_device_init_meta_itob_state(struct radv_device *device)
*/
VkDescriptorSetLayoutCreateInfo ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 2,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
@@ -227,546 +198,10 @@ radv_device_finish_meta_itob_state(struct radv_device *device)
}
}
-static nir_shader *
-build_nir_btoi_compute_shader(struct radv_device *dev)
-{
- nir_builder b;
- const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
- false,
- false,
- GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
- false,
- false,
- GLSL_TYPE_FLOAT);
- nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "meta_btoi_cs");
- b.shader->info->cs.local_size[0] = 16;
- b.shader->info->cs.local_size[1] = 16;
- b.shader->info->cs.local_size[2] = 1;
- nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
- buf_type, "s_tex");
- input_img->data.descriptor_set = 0;
- input_img->data.binding = 0;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "out_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 1;
-
- nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
- nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info->cs.local_size[0],
- b.shader->info->cs.local_size[1],
- b.shader->info->cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
- nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
- offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
- offset->num_components = 2;
- nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
- nir_builder_instr_insert(&b, &offset->instr);
-
- nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
- stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
- stride->num_components = 1;
- nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
- nir_builder_instr_insert(&b, &stride->instr);
-
- nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
- nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
-
- nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
- tmp = nir_iadd(&b, tmp, pos_x);
-
- nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
-
- nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
-
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
- tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
- tex->op = nir_texop_txf;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
- tex->src[1].src_type = nir_tex_src_lod;
- tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
- tex->dest_type = nir_type_float;
- tex->is_array = false;
- tex->coord_components = 1;
- tex->texture = nir_deref_var_create(tex, input_img);
- tex->sampler = NULL;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
-
- nir_ssa_def *outval = &tex->dest.ssa;
- nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
- store->src[0] = nir_src_for_ssa(img_coord);
- store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
- store->src[2] = nir_src_for_ssa(outval);
- store->variables[0] = nir_deref_var_create(store, output_img);
-
- nir_builder_instr_insert(&b, &store->instr);
- return b.shader;
-}
-
-/* Buffer to image - don't write use image accessors */
-static VkResult
-radv_device_init_meta_btoi_state(struct radv_device *device)
-{
- VkResult result;
- struct radv_shader_module cs = { .nir = NULL };
-
- zero(device->meta_state.btoi);
-
- cs.nir = build_nir_btoi_compute_shader(device);
-
- /*
- * two descriptors one for the image being sampled
- * one for the buffer being written.
- */
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.btoi.img_ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.btoi.img_ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 12},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.btoi.img_p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- /* compute shader */
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = radv_shader_module_to_handle(&cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.btoi.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info, NULL,
- &device->meta_state.btoi.pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- ralloc_free(cs.nir);
- return VK_SUCCESS;
-fail:
- ralloc_free(cs.nir);
- return result;
-}
-
-static void
-radv_device_finish_meta_btoi_state(struct radv_device *device)
-{
- if (device->meta_state.btoi.img_p_layout) {
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.btoi.img_p_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.btoi.img_ds_layout) {
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.btoi.img_ds_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.btoi.pipeline) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.btoi.pipeline,
- &device->meta_state.alloc);
- }
-}
-
-static nir_shader *
-build_nir_itoi_compute_shader(struct radv_device *dev)
-{
- nir_builder b;
- const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
- false,
- false,
- GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
- false,
- false,
- GLSL_TYPE_FLOAT);
- nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "meta_itoi_cs");
- b.shader->info->cs.local_size[0] = 16;
- b.shader->info->cs.local_size[1] = 16;
- b.shader->info->cs.local_size[2] = 1;
- nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
- buf_type, "s_tex");
- input_img->data.descriptor_set = 0;
- input_img->data.binding = 0;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "out_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 1;
-
- nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
- nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info->cs.local_size[0],
- b.shader->info->cs.local_size[1],
- b.shader->info->cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
- nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
- src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
- src_offset->num_components = 2;
- nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
- nir_builder_instr_insert(&b, &src_offset->instr);
-
- nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
- dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
- dst_offset->num_components = 2;
- nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
- nir_builder_instr_insert(&b, &dst_offset->instr);
-
- nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
-
- nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
-
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
- tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
- tex->op = nir_texop_txf;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 3));
- tex->src[1].src_type = nir_tex_src_lod;
- tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
- tex->dest_type = nir_type_float;
- tex->is_array = false;
- tex->coord_components = 2;
- tex->texture = nir_deref_var_create(tex, input_img);
- tex->sampler = NULL;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
-
- nir_ssa_def *outval = &tex->dest.ssa;
- nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
- store->src[0] = nir_src_for_ssa(dst_coord);
- store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
- store->src[2] = nir_src_for_ssa(outval);
- store->variables[0] = nir_deref_var_create(store, output_img);
-
- nir_builder_instr_insert(&b, &store->instr);
- return b.shader;
-}
-
-/* image to image - don't write use image accessors */
-static VkResult
-radv_device_init_meta_itoi_state(struct radv_device *device)
-{
- VkResult result;
- struct radv_shader_module cs = { .nir = NULL };
-
- zero(device->meta_state.itoi);
-
- cs.nir = build_nir_itoi_compute_shader(device);
-
- /*
- * two descriptors one for the image being sampled
- * one for the buffer being written.
- */
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.itoi.img_ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.itoi.img_ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.itoi.img_p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- /* compute shader */
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = radv_shader_module_to_handle(&cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.itoi.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info, NULL,
- &device->meta_state.itoi.pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- ralloc_free(cs.nir);
- return VK_SUCCESS;
-fail:
- ralloc_free(cs.nir);
- return result;
-}
-
-static void
-radv_device_finish_meta_itoi_state(struct radv_device *device)
-{
- if (device->meta_state.itoi.img_p_layout) {
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.itoi.img_p_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.itoi.img_ds_layout) {
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.itoi.img_ds_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.itoi.pipeline) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.itoi.pipeline,
- &device->meta_state.alloc);
- }
-}
-
-static nir_shader *
-build_nir_cleari_compute_shader(struct radv_device *dev)
-{
- nir_builder b;
- const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
- false,
- false,
- GLSL_TYPE_FLOAT);
- nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "meta_cleari_cs");
- b.shader->info->cs.local_size[0] = 16;
- b.shader->info->cs.local_size[1] = 16;
- b.shader->info->cs.local_size[2] = 1;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "out_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 0;
-
- nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
- nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info->cs.local_size[0],
- b.shader->info->cs.local_size[1],
- b.shader->info->cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
- nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
- clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
- clear_val->num_components = 4;
- nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
- nir_builder_instr_insert(&b, &clear_val->instr);
-
- nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
- store->src[0] = nir_src_for_ssa(global_id);
- store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
- store->src[2] = nir_src_for_ssa(&clear_val->dest.ssa);
- store->variables[0] = nir_deref_var_create(store, output_img);
-
- nir_builder_instr_insert(&b, &store->instr);
- return b.shader;
-}
-
-static VkResult
-radv_device_init_meta_cleari_state(struct radv_device *device)
-{
- VkResult result;
- struct radv_shader_module cs = { .nir = NULL };
-
- zero(device->meta_state.cleari);
-
- cs.nir = build_nir_cleari_compute_shader(device);
-
- /*
- * two descriptors one for the image being sampled
- * one for the buffer being written.
- */
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.cleari.img_ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.cleari.img_ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.cleari.img_p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- /* compute shader */
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = radv_shader_module_to_handle(&cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.cleari.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info, NULL,
- &device->meta_state.cleari.pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- ralloc_free(cs.nir);
- return VK_SUCCESS;
-fail:
- ralloc_free(cs.nir);
- return result;
-}
-
-static void
-radv_device_finish_meta_cleari_state(struct radv_device *device)
-{
- if (device->meta_state.cleari.img_p_layout) {
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.cleari.img_p_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.cleari.img_ds_layout) {
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.cleari.img_ds_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.cleari.pipeline) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.cleari.pipeline,
- &device->meta_state.alloc);
- }
-}
-
void
radv_device_finish_meta_bufimage_state(struct radv_device *device)
{
radv_device_finish_meta_itob_state(device);
- radv_device_finish_meta_btoi_state(device);
- radv_device_finish_meta_itoi_state(device);
- radv_device_finish_meta_cleari_state(device);
}
VkResult
@@ -777,41 +212,7 @@ radv_device_init_meta_bufimage_state(struct radv_device *device)
result = radv_device_init_meta_itob_state(device);
if (result != VK_SUCCESS)
return result;
-
- result = radv_device_init_meta_btoi_state(device);
- if (result != VK_SUCCESS)
- goto fail_itob;
-
- result = radv_device_init_meta_itoi_state(device);
- if (result != VK_SUCCESS)
- goto fail_btoi;
-
- result = radv_device_init_meta_cleari_state(device);
- if (result != VK_SUCCESS)
- goto fail_itoi;
-
return VK_SUCCESS;
-fail_itoi:
- radv_device_finish_meta_itoi_state(device);
-fail_btoi:
- radv_device_finish_meta_btoi_state(device);
-fail_itob:
- radv_device_finish_meta_itob_state(device);
- return result;
-}
-
-void
-radv_meta_begin_itoi(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save)
-{
- radv_meta_save_compute(save, cmd_buffer, 16);
-}
-
-void
-radv_meta_end_itoi(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save)
-{
- radv_meta_restore_compute(save, cmd_buffer, 16);
}
void
@@ -828,20 +229,6 @@ radv_meta_end_bufimage(struct radv_cmd_buffer *cmd_buffer,
radv_meta_restore_compute(save, cmd_buffer, 12);
}
-void
-radv_meta_begin_cleari(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save)
-{
- radv_meta_save_compute(save, cmd_buffer, 16);
-}
-
-void
-radv_meta_end_cleari(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save)
-{
- radv_meta_restore_compute(save, cmd_buffer, 16);
-}
-
static void
create_iview(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *surf,
@@ -886,227 +273,86 @@ create_bview(struct radv_cmd_buffer *cmd_buffer,
struct itob_temps {
struct radv_image_view src_iview;
+
struct radv_buffer_view dst_bview;
+ VkDescriptorSet set;
};
static void
-itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct itob_temps *tmp)
-{
- struct radv_device *device = cmd_buffer->device;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.itob.img_p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&tmp->src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->dst_bview) },
- }
- });
-}
-
-static void
-itob_bind_pipeline(struct radv_cmd_buffer *cmd_buffer)
+itob_bind_src_image(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_meta_blit2d_surf *src,
+ struct radv_meta_blit2d_rect *rect,
+ struct itob_temps *tmp)
{
- VkPipeline pipeline =
- cmd_buffer->device->meta_state.itob.pipeline;
-
- if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- }
+ create_iview(cmd_buffer, src, VK_IMAGE_USAGE_SAMPLED_BIT, &tmp->src_iview);
}
-void
-radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src,
- struct radv_meta_blit2d_buffer *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects)
-{
- struct radv_device *device = cmd_buffer->device;
- struct itob_temps temps;
-
- create_iview(cmd_buffer, src, VK_IMAGE_USAGE_SAMPLED_BIT, &temps.src_iview);
- create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &temps.dst_bview);
- itob_bind_descriptors(cmd_buffer, &temps);
-
- itob_bind_pipeline(cmd_buffer);
-
- for (unsigned r = 0; r < num_rects; ++r) {
- unsigned push_constants[3] = {
- rects[r].src_x,
- rects[r].src_y,
- dst->pitch
- };
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.itob.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 12,
- push_constants);
-
- radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
- }
-}
-
-struct btoi_temps {
- struct radv_buffer_view src_bview;
- struct radv_image_view dst_iview;
-};
-
static void
-btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct btoi_temps *tmp)
+itob_bind_dst_buffer(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_meta_blit2d_buffer *dst,
+ struct radv_meta_blit2d_rect *rect,
+ struct itob_temps *tmp)
{
- struct radv_device *device = cmd_buffer->device;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.btoi.img_p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->src_bview) },
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&tmp->dst_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- }
- });
+ create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &tmp->dst_bview);
}
static void
-btoi_bind_pipeline(struct radv_cmd_buffer *cmd_buffer)
-{
- VkPipeline pipeline =
- cmd_buffer->device->meta_state.btoi.pipeline;
-
- if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- }
-}
-
-void
-radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_buffer *src,
- struct radv_meta_blit2d_surf *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects)
+itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
+ struct itob_temps *tmp)
{
struct radv_device *device = cmd_buffer->device;
- struct btoi_temps temps;
-
- create_bview(cmd_buffer, src->buffer, src->offset, src->format, &temps.src_bview);
- create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &temps.dst_iview);
- btoi_bind_descriptors(cmd_buffer, &temps);
-
- btoi_bind_pipeline(cmd_buffer);
-
- for (unsigned r = 0; r < num_rects; ++r) {
- unsigned push_constants[3] = {
- rects[r].dst_x,
- rects[r].dst_y,
- src->pitch
- };
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.btoi.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 12,
- push_constants);
-
- radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
- }
+ VkDevice vk_device = radv_device_to_handle(cmd_buffer->device);
+
+ radv_temp_descriptor_set_create(device, cmd_buffer,
+ device->meta_state.itob.img_ds_layout,
+ &tmp->set);
+
+ radv_UpdateDescriptorSets(vk_device,
+ 2, /* writeCount */
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = tmp->set,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = NULL,
+ .imageView = radv_image_view_to_handle(&tmp->src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = tmp->set,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->dst_bview) },
+ }
+ }, 0, NULL);
+
+ radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.itob.img_p_layout, 0, 1,
+ &tmp->set, 0, NULL);
}
-struct itoi_temps {
- struct radv_image_view src_iview;
- struct radv_image_view dst_iview;
-};
-
static void
-itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct itoi_temps *tmp)
+itob_unbind_src_image(struct radv_cmd_buffer *cmd_buffer,
+ struct itob_temps *temps)
{
- struct radv_device *device = cmd_buffer->device;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.itoi.img_p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&tmp->src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&tmp->dst_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- }
- });
}
static void
-itoi_bind_pipeline(struct radv_cmd_buffer *cmd_buffer)
+bind_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
VkPipeline pipeline =
- cmd_buffer->device->meta_state.itoi.pipeline;
+ cmd_buffer->device->meta_state.itob.pipeline;
if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
@@ -1115,103 +361,36 @@ itoi_bind_pipeline(struct radv_cmd_buffer *cmd_buffer)
}
void
-radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src,
- struct radv_meta_blit2d_surf *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects)
+radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_meta_blit2d_surf *src,
+ struct radv_meta_blit2d_buffer *dst,
+ unsigned num_rects,
+ struct radv_meta_blit2d_rect *rects)
{
struct radv_device *device = cmd_buffer->device;
- struct itoi_temps temps;
- create_iview(cmd_buffer, src, VK_IMAGE_USAGE_SAMPLED_BIT, &temps.src_iview);
- create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &temps.dst_iview);
+ for (unsigned r = 0; r < num_rects; ++r) {
+ struct itob_temps temps;
- itoi_bind_descriptors(cmd_buffer, &temps);
+ itob_bind_src_image(cmd_buffer, src, &rects[r], &temps);
+ itob_bind_dst_buffer(cmd_buffer, dst, &rects[r], &temps);
+ itob_bind_descriptors(cmd_buffer, &temps);
- itoi_bind_pipeline(cmd_buffer);
+ bind_pipeline(cmd_buffer);
- for (unsigned r = 0; r < num_rects; ++r) {
- unsigned push_constants[4] = {
+ unsigned push_constants[3] = {
rects[r].src_x,
rects[r].src_y,
- rects[r].dst_x,
- rects[r].dst_y,
+ dst->pitch
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.itoi.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+ device->meta_state.itob.img_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 12,
push_constants);
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
+ radv_temp_descriptor_set_destroy(cmd_buffer->device, temps.set);
+ itob_unbind_src_image(cmd_buffer, &temps);
}
-}
-
-static void
-cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *dst_iview)
-{
- struct radv_device *device = cmd_buffer->device;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.cleari.img_p_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(dst_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- },
- });
-}
-
-static void
-cleari_bind_pipeline(struct radv_cmd_buffer *cmd_buffer)
-{
- VkPipeline pipeline =
- cmd_buffer->device->meta_state.cleari.pipeline;
-
- if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- }
-}
-
-void
-radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *dst,
- const VkClearColorValue *clear_color)
-{
- struct radv_device *device = cmd_buffer->device;
- struct radv_image_view dst_iview;
-
- create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &dst_iview);
- cleari_bind_descriptors(cmd_buffer, &dst_iview);
-
- cleari_bind_pipeline(cmd_buffer);
-
- unsigned push_constants[4] = {
- clear_color->uint32[0],
- clear_color->uint32[1],
- clear_color->uint32[2],
- clear_color->uint32[3],
- };
-
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.cleari.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
- push_constants);
- radv_unaligned_dispatch(cmd_buffer, dst->image->extent.width, dst->image->extent.height, 1);
}
diff --git a/lib/mesa/src/amd/vulkan/radv_meta_clear.c b/lib/mesa/src/amd/vulkan/radv_meta_clear.c
index d06cf4eeb..a3477036e 100644
--- a/lib/mesa/src/amd/vulkan/radv_meta_clear.c
+++ b/lib/mesa/src/amd/vulkan/radv_meta_clear.c
@@ -56,8 +56,8 @@ build_color_shaders(struct nir_shader **out_vs,
nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
- vs_b.shader->info->name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
- fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");
+ vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
+ fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");
const struct glsl_type *position_type = glsl_vec4_type();
const struct glsl_type *color_type = glsl_vec4_type();
@@ -98,18 +98,6 @@ build_color_shaders(struct nir_shader **out_vs,
nir_copy_var(&vs_b, vs_out_color, vs_in_color);
nir_copy_var(&fs_b, fs_out_color, fs_in_color);
- const struct glsl_type *layer_type = glsl_int_type();
- nir_variable *vs_out_layer =
- nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
- "v_layer");
- vs_out_layer->data.location = VARYING_SLOT_LAYER;
- vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
- nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0);
- nir_ssa_def *base_instance = nir_load_system_value(&vs_b, nir_intrinsic_load_base_instance, 0);
-
- nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
- nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
-
*out_vs = vs_b.shader;
*out_fs = fs_b.shader;
}
@@ -161,8 +149,8 @@ create_pipeline(struct radv_device *device,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
+ .viewportCount = 0,
+ .scissorCount = 0,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
@@ -189,11 +177,9 @@ create_pipeline(struct radv_device *device,
* we need only restore dynamic state was vkCmdSet.
*/
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 8,
+ .dynamicStateCount = 6,
.pDynamicStates = (VkDynamicState[]) {
/* Everything except stencil write mask */
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_LINE_WIDTH,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
@@ -219,50 +205,12 @@ create_pipeline(struct radv_device *device,
}
static VkResult
-create_color_renderpass(struct radv_device *device,
- VkFormat vk_format,
- uint32_t samples,
- VkRenderPass *pass)
-{
- return radv_CreateRenderPass(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription) {
- .format = vk_format,
- .samples = samples,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
- .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription) {
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 1,
- .pColorAttachments = &(VkAttachmentReference) {
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference) {
- .attachment = VK_ATTACHMENT_UNUSED,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .preserveAttachmentCount = 1,
- .pPreserveAttachments = (uint32_t[]) { 0 },
- },
- .dependencyCount = 0,
- }, &device->meta_state.alloc, pass);
-}
-
-static VkResult
create_color_pipeline(struct radv_device *device,
+ VkFormat vk_format,
uint32_t samples,
uint32_t frag_output,
struct radv_pipeline **pipeline,
- VkRenderPass pass)
+ VkRenderPass *pass)
{
struct nir_shader *vs_nir;
struct nir_shader *fs_nir;
@@ -322,11 +270,44 @@ create_color_pipeline(struct radv_device *device,
.pAttachments = blend_attachment_state
};
+ result = radv_CreateRenderPass(radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = vk_format,
+ .samples = samples,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = VK_ATTACHMENT_UNUSED,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .preserveAttachmentCount = 1,
+ .pPreserveAttachments = (uint32_t[]) { 0 },
+ },
+ .dependencyCount = 0,
+ }, &device->meta_state.alloc, pass);
+ if (result != VK_SUCCESS)
+ return result;
struct radv_graphics_pipeline_create_info extra = {
.use_rectlist = true,
};
- result = create_pipeline(device, radv_render_pass_from_handle(pass),
+ result = create_pipeline(device, radv_render_pass_from_handle(*pass),
samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
&extra, &device->meta_state.alloc, pipeline);
@@ -365,10 +346,12 @@ radv_device_finish_meta_clear_state(struct radv_device *device)
for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
destroy_pipeline(device, state->clear[i].depth_only_pipeline[j]);
+ destroy_render_pass(device, state->clear[i].depth_only_rp[j]);
destroy_pipeline(device, state->clear[i].stencil_only_pipeline[j]);
+ destroy_render_pass(device, state->clear[i].stencil_only_rp[j]);
destroy_pipeline(device, state->clear[i].depthstencil_pipeline[j]);
+ destroy_render_pass(device, state->clear[i].depthstencil_rp[j]);
}
- destroy_render_pass(device, state->clear[i].depthstencil_rp);
}
}
@@ -412,22 +395,22 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
const struct color_clear_vattrs vertex_data[3] = {
{
.position = {
- -1.0,
- -1.0,
+ clear_rect->rect.offset.x,
+ clear_rect->rect.offset.y,
},
.color = clear_value,
},
{
.position = {
- -1.0,
- 1.0,
+ clear_rect->rect.offset.x,
+ clear_rect->rect.offset.y + clear_rect->rect.extent.height,
},
.color = clear_value,
},
{
.position = {
- 1.0,
- -1.0,
+ clear_rect->rect.offset.x + clear_rect->rect.extent.width,
+ clear_rect->rect.offset.y,
},
.color = clear_value,
},
@@ -461,18 +444,7 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
pipeline_h);
}
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = clear_rect->rect.offset.x,
- .y = clear_rect->rect.offset.y,
- .width = clear_rect->rect.extent.width,
- .height = clear_rect->rect.extent.height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
-
- radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
+ radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
}
@@ -486,8 +458,8 @@ build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs
nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
- vs_b.shader->info->name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
- fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
+ vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
+ fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
const struct glsl_type *position_type = glsl_vec4_type();
nir_variable *vs_in_pos =
@@ -502,64 +474,17 @@ build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs
nir_copy_var(&vs_b, vs_out_pos, vs_in_pos);
- const struct glsl_type *layer_type = glsl_int_type();
- nir_variable *vs_out_layer =
- nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
- "v_layer");
- vs_out_layer->data.location = VARYING_SLOT_LAYER;
- vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
- nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0);
- nir_ssa_def *base_instance = nir_load_system_value(&vs_b, nir_intrinsic_load_base_instance, 0);
-
- nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
- nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
-
*out_vs = vs_b.shader;
*out_fs = fs_b.shader;
}
static VkResult
-create_depthstencil_renderpass(struct radv_device *device,
- uint32_t samples,
- VkRenderPass *render_pass)
-{
- return radv_CreateRenderPass(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription) {
- .format = VK_FORMAT_UNDEFINED,
- .samples = samples,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
- .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription) {
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference) {
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .preserveAttachmentCount = 1,
- .pPreserveAttachments = (uint32_t[]) { 0 },
- },
- .dependencyCount = 0,
- }, &device->meta_state.alloc, render_pass);
-}
-
-static VkResult
create_depthstencil_pipeline(struct radv_device *device,
VkImageAspectFlags aspects,
uint32_t samples,
int index,
struct radv_pipeline **pipeline,
- VkRenderPass render_pass)
+ VkRenderPass *render_pass)
{
struct nir_shader *vs_nir, *fs_nir;
VkResult result;
@@ -610,6 +535,36 @@ create_depthstencil_pipeline(struct radv_device *device,
.pAttachments = NULL,
};
+ result = radv_CreateRenderPass(radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = VK_FORMAT_UNDEFINED,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .preserveAttachmentCount = 1,
+ .pPreserveAttachments = (uint32_t[]) { 0 },
+ },
+ .dependencyCount = 0,
+ }, &device->meta_state.alloc, render_pass);
+ if (result != VK_SUCCESS)
+ return result;
+
struct radv_graphics_pipeline_create_info extra = {
.use_rectlist = true,
};
@@ -622,7 +577,7 @@ create_depthstencil_pipeline(struct radv_device *device,
extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
extra.db_stencil_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
}
- result = create_pipeline(device, radv_render_pass_from_handle(render_pass),
+ result = create_pipeline(device, radv_render_pass_from_handle(*render_pass),
samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
&extra, &device->meta_state.alloc, pipeline);
return result;
@@ -636,7 +591,7 @@ static bool depth_view_can_fast_clear(const struct radv_image_view *iview,
clear_rect->rect.extent.width != iview->extent.width ||
clear_rect->rect.extent.height != iview->extent.height)
return false;
- if (iview->image->surface.htile_size &&
+ if (iview->image->htile.size &&
iview->base_mip == 0 &&
iview->base_layer == 0 &&
radv_layout_can_expclear(iview->image, layout) &&
@@ -698,28 +653,25 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
VK_IMAGE_ASPECT_STENCIL_BIT));
assert(pass_att != VK_ATTACHMENT_UNUSED);
- if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
- clear_value.depth = 1.0f;
-
const struct depthstencil_clear_vattrs vertex_data[3] = {
{
.position = {
- -1.0,
- -1.0
+ clear_rect->rect.offset.x,
+ clear_rect->rect.offset.y,
},
.depth_clear = clear_value.depth,
},
{
.position = {
- -1.0,
- 1.0,
+ clear_rect->rect.offset.x,
+ clear_rect->rect.offset.y + clear_rect->rect.extent.height,
},
.depth_clear = clear_value.depth,
},
{
.position = {
- 1.0,
- -1.0,
+ clear_rect->rect.offset.x + clear_rect->rect.extent.width,
+ clear_rect->rect.offset.y,
},
.depth_clear = clear_value.depth,
},
@@ -757,18 +709,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
if (depth_view_can_fast_clear(iview, subpass->depth_stencil_attachment.layout, clear_rect))
radv_set_depth_clear_regs(cmd_buffer, iview->image, clear_value, aspects);
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = clear_rect->rect.offset.x,
- .y = clear_rect->rect.offset.y,
- .width = clear_rect->rect.extent.width,
- .height = clear_rect->rect.extent.height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
-
- radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
+ radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
}
@@ -799,32 +740,20 @@ radv_device_init_meta_clear_state(struct radv_device *device)
VkFormat format = pipeline_formats[j];
unsigned fs_key = radv_format_meta_fs_key(format);
assert(!state->clear[i].color_pipelines[fs_key]);
-
- res = create_color_renderpass(device, format, samples,
- &state->clear[i].render_pass[fs_key]);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_color_pipeline(device, samples, 0, &state->clear[i].color_pipelines[fs_key],
- state->clear[i].render_pass[fs_key]);
+ res = create_color_pipeline(device, format, samples, 0, &state->clear[i].color_pipelines[fs_key],
+ &state->clear[i].render_pass[fs_key]);
if (res != VK_SUCCESS)
goto fail;
}
- res = create_depthstencil_renderpass(device,
- samples,
- &state->clear[i].depthstencil_rp);
- if (res != VK_SUCCESS)
- goto fail;
-
for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
res = create_depthstencil_pipeline(device,
VK_IMAGE_ASPECT_DEPTH_BIT,
samples,
j,
&state->clear[i].depth_only_pipeline[j],
- state->clear[i].depthstencil_rp);
+ &state->clear[i].depth_only_rp[j]);
if (res != VK_SUCCESS)
goto fail;
@@ -833,7 +762,7 @@ radv_device_init_meta_clear_state(struct radv_device *device)
samples,
j,
&state->clear[i].stencil_only_pipeline[j],
- state->clear[i].depthstencil_rp);
+ &state->clear[i].stencil_only_rp[j]);
if (res != VK_SUCCESS)
goto fail;
@@ -843,7 +772,7 @@ radv_device_init_meta_clear_state(struct radv_device *device)
samples,
j,
&state->clear[i].depthstencil_pipeline[j],
- state->clear[i].depthstencil_rp);
+ &state->clear[i].depthstencil_rp[j]);
if (res != VK_SUCCESS)
goto fail;
}
@@ -858,9 +787,7 @@ fail:
static bool
emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
const VkClearAttachment *clear_att,
- const VkClearRect *clear_rect,
- enum radv_cmd_flush_bits *pre_flush,
- enum radv_cmd_flush_bits *post_flush)
+ const VkClearRect *clear_rect)
{
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
const uint32_t subpass_att = clear_att->colorAttachment;
@@ -875,10 +802,10 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
if (!iview->image->cmask.size && !iview->image->surface.dcc_size)
return false;
- if (cmd_buffer->device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
+ if (!cmd_buffer->device->allow_fast_clears)
return false;
- if (!radv_layout_can_fast_clear(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index)))
+ if (!radv_layout_has_cmask(iview->image, image_layout))
goto fail;
if (vk_format_get_blocksizebits(iview->image->vk_format) > 64)
goto fail;
@@ -918,13 +845,9 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
if (ret == false)
goto fail;
- if (pre_flush) {
- cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) & ~ *pre_flush;
- *pre_flush |= cmd_buffer->state.flush_bits;
- } else
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+ si_emit_cache_flush(cmd_buffer);
/* clear cmask buffer */
if (iview->image->surface.dcc_size) {
radv_fill_buffer(cmd_buffer, iview->image->bo,
@@ -935,15 +858,9 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
iview->image->offset + iview->image->cmask.offset,
iview->image->cmask.size, 0);
}
-
- if (post_flush)
- *post_flush |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
- else
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_INV_VMEM_L1 |
+ RADV_CMD_FLAG_INV_GLOBAL_L2;
radv_set_color_clear_regs(cmd_buffer, iview->image, subpass_att, clear_color);
@@ -958,14 +875,11 @@ fail:
static void
emit_clear(struct radv_cmd_buffer *cmd_buffer,
const VkClearAttachment *clear_att,
- const VkClearRect *clear_rect,
- enum radv_cmd_flush_bits *pre_flush,
- enum radv_cmd_flush_bits *post_flush)
+ const VkClearRect *clear_rect)
{
if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- if (!emit_fast_color_clear(cmd_buffer, clear_att, clear_rect,
- pre_flush, post_flush))
+ if (!emit_fast_color_clear(cmd_buffer, clear_att, clear_rect))
emit_color_clear(cmd_buffer, clear_att, clear_rect);
} else {
assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
@@ -1008,18 +922,19 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_cmd_state *cmd_state = &cmd_buffer->state;
struct radv_meta_saved_state saved_state;
- enum radv_cmd_flush_bits pre_flush = 0;
- enum radv_cmd_flush_bits post_flush = 0;
if (!subpass_needs_clear(cmd_buffer))
return;
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+ if (cmd_state->framebuffer->layers > 1)
+ radv_finishme("clearing multi-layer framebuffer");
+
VkClearRect clear_rect = {
.rect = cmd_state->render_area,
.baseArrayLayer = 0,
- .layerCount = cmd_state->framebuffer->layers,
+ .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
};
for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
@@ -1037,7 +952,7 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
.clearValue = cmd_state->attachments[a].clear_value,
};
- emit_clear(cmd_buffer, &clear_att, &clear_rect, &pre_flush, &post_flush);
+ emit_clear(cmd_buffer, &clear_att, &clear_rect);
cmd_state->attachments[a].pending_clear_aspects = 0;
}
@@ -1052,151 +967,23 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
.clearValue = cmd_state->attachments[ds].clear_value,
};
- emit_clear(cmd_buffer, &clear_att, &clear_rect,
- &pre_flush, &post_flush);
+ emit_clear(cmd_buffer, &clear_att, &clear_rect);
cmd_state->attachments[ds].pending_clear_aspects = 0;
}
}
radv_meta_restore(&saved_state, cmd_buffer);
- cmd_buffer->state.flush_bits |= post_flush;
}
static void
-radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- VkImageLayout image_layout,
- const VkImageSubresourceRange *range,
- VkFormat format, int level, int layer,
- const VkClearValue *clear_val)
-{
- VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
- struct radv_image_view iview;
- radv_image_view_init(&iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = radv_meta_get_view_type(image),
- .format = format,
- .subresourceRange = {
- .aspectMask = range->aspectMask,
- .baseMipLevel = range->baseMipLevel + level,
- .levelCount = 1,
- .baseArrayLayer = range->baseArrayLayer + layer,
- .layerCount = 1
- },
- },
- cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
-
- VkFramebuffer fb;
- radv_CreateFramebuffer(device_h,
- &(VkFramebufferCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = (VkImageView[]) {
- radv_image_view_to_handle(&iview),
- },
- .width = iview.extent.width,
- .height = iview.extent.height,
- .layers = 1
- },
- &cmd_buffer->pool->alloc,
- &fb);
-
- VkAttachmentDescription att_desc = {
- .format = iview.vk_format,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = image_layout,
- .finalLayout = image_layout,
- };
-
- VkSubpassDescription subpass_desc = {
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = NULL,
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- };
-
- const VkAttachmentReference att_ref = {
- .attachment = 0,
- .layout = image_layout,
- };
-
- if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- subpass_desc.colorAttachmentCount = 1;
- subpass_desc.pColorAttachments = &att_ref;
- } else {
- subpass_desc.pDepthStencilAttachment = &att_ref;
- }
-
- VkRenderPass pass;
- radv_CreateRenderPass(device_h,
- &(VkRenderPassCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = &att_desc,
- .subpassCount = 1,
- .pSubpasses = &subpass_desc,
- },
- &cmd_buffer->pool->alloc,
- &pass);
-
- radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
- &(VkRenderPassBeginInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderArea = {
- .offset = { 0, 0, },
- .extent = {
- .width = iview.extent.width,
- .height = iview.extent.height,
- },
- },
- .renderPass = pass,
- .framebuffer = fb,
- .clearValueCount = 0,
- .pClearValues = NULL,
- },
- VK_SUBPASS_CONTENTS_INLINE);
-
- VkClearAttachment clear_att = {
- .aspectMask = range->aspectMask,
- .colorAttachment = 0,
- .clearValue = *clear_val,
- };
-
- VkClearRect clear_rect = {
- .rect = {
- .offset = { 0, 0 },
- .extent = { iview.extent.width, iview.extent.height },
- },
- .baseArrayLayer = range->baseArrayLayer,
- .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
- };
-
- emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL);
-
- radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
- radv_DestroyRenderPass(device_h, pass,
- &cmd_buffer->pool->alloc);
- radv_DestroyFramebuffer(device_h, fb,
- &cmd_buffer->pool->alloc);
-}
-static void
radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkImageLayout image_layout,
const VkClearValue *clear_value,
uint32_t range_count,
- const VkImageSubresourceRange *ranges,
- bool cs)
+ const VkImageSubresourceRange *ranges)
{
+ VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
VkFormat format = image->vk_format;
VkClearValue internal_clear_value = *clear_value;
@@ -1207,14 +994,6 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
internal_clear_value.color.uint32[0] = value;
}
- if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
- uint8_t r, g;
- format = VK_FORMAT_R8_UINT;
- r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
- g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
- internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
- }
-
for (uint32_t r = 0; r < range_count; r++) {
const VkImageSubresourceRange *range = &ranges[r];
for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
@@ -1222,30 +1001,127 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
radv_minify(image->extent.depth, range->baseMipLevel + l) :
radv_get_layerCount(image, range);
for (uint32_t s = 0; s < layer_count; ++s) {
-
- if (cs) {
- struct radv_meta_blit2d_surf surf;
- surf.format = format;
- surf.image = image;
- surf.level = range->baseMipLevel + l;
- surf.layer = range->baseArrayLayer + s;
- surf.aspect_mask = range->aspectMask;
- radv_meta_clear_image_cs(cmd_buffer, &surf,
- &internal_clear_value.color);
+ struct radv_image_view iview;
+ radv_image_view_init(&iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = radv_meta_get_view_type(image),
+ .format = format,
+ .subresourceRange = {
+ .aspectMask = range->aspectMask,
+ .baseMipLevel = range->baseMipLevel + l,
+ .levelCount = 1,
+ .baseArrayLayer = range->baseArrayLayer + s,
+ .layerCount = 1
+ },
+ },
+ cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
+
+ VkFramebuffer fb;
+ radv_CreateFramebuffer(device_h,
+ &(VkFramebufferCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkImageView[]) {
+ radv_image_view_to_handle(&iview),
+ },
+ .width = iview.extent.width,
+ .height = iview.extent.height,
+ .layers = 1
+ },
+ &cmd_buffer->pool->alloc,
+ &fb);
+
+ VkAttachmentDescription att_desc = {
+ .format = iview.vk_format,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = image_layout,
+ .finalLayout = image_layout,
+ };
+
+ VkSubpassDescription subpass_desc = {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = NULL,
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ };
+
+ const VkAttachmentReference att_ref = {
+ .attachment = 0,
+ .layout = image_layout,
+ };
+
+ if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+ subpass_desc.colorAttachmentCount = 1;
+ subpass_desc.pColorAttachments = &att_ref;
} else {
- radv_clear_image_layer(cmd_buffer, image, image_layout,
- range, format, l, s, &internal_clear_value);
+ subpass_desc.pDepthStencilAttachment = &att_ref;
}
+
+ VkRenderPass pass;
+ radv_CreateRenderPass(device_h,
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &att_desc,
+ .subpassCount = 1,
+ .pSubpasses = &subpass_desc,
+ },
+ &cmd_buffer->pool->alloc,
+ &pass);
+
+ radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
+ &(VkRenderPassBeginInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderArea = {
+ .offset = { 0, 0, },
+ .extent = {
+ .width = iview.extent.width,
+ .height = iview.extent.height,
+ },
+ },
+ .renderPass = pass,
+ .framebuffer = fb,
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ VK_SUBPASS_CONTENTS_INLINE);
+
+ VkClearAttachment clear_att = {
+ .aspectMask = range->aspectMask,
+ .colorAttachment = 0,
+ .clearValue = internal_clear_value,
+ };
+
+ VkClearRect clear_rect = {
+ .rect = {
+ .offset = { 0, 0 },
+ .extent = { iview.extent.width, iview.extent.height },
+ },
+ .baseArrayLayer = range->baseArrayLayer,
+ .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
+ };
+
+ emit_clear(cmd_buffer, &clear_att, &clear_rect);
+
+ radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
+ radv_DestroyRenderPass(device_h, pass,
+ &cmd_buffer->pool->alloc);
+ radv_DestroyFramebuffer(device_h, fb,
+ &cmd_buffer->pool->alloc);
}
}
}
}
-union meta_saved_state {
- struct radv_meta_saved_state gfx;
- struct radv_meta_saved_compute_state compute;
-};
-
void radv_CmdClearColorImage(
VkCommandBuffer commandBuffer,
VkImage image_h,
@@ -1256,22 +1132,15 @@ void radv_CmdClearColorImage(
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_image, image, image_h);
- union meta_saved_state saved_state;
- bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
+ struct radv_meta_saved_state saved_state;
- if (cs)
- radv_meta_begin_cleari(cmd_buffer, &saved_state.compute);
- else
- radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);
+ radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
radv_cmd_clear_image(cmd_buffer, image, imageLayout,
(const VkClearValue *) pColor,
- rangeCount, pRanges, cs);
+ rangeCount, pRanges);
- if (cs)
- radv_meta_end_cleari(cmd_buffer, &saved_state.compute);
- else
- radv_meta_restore(&saved_state.gfx, cmd_buffer);
+ radv_meta_restore(&saved_state, cmd_buffer);
}
void radv_CmdClearDepthStencilImage(
@@ -1290,7 +1159,7 @@ void radv_CmdClearDepthStencilImage(
radv_cmd_clear_image(cmd_buffer, image, imageLayout,
(const VkClearValue *) pDepthStencil,
- rangeCount, pRanges, false);
+ rangeCount, pRanges);
radv_meta_restore(&saved_state, cmd_buffer);
}
@@ -1304,8 +1173,6 @@ void radv_CmdClearAttachments(
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_meta_saved_state saved_state;
- enum radv_cmd_flush_bits pre_flush = 0;
- enum radv_cmd_flush_bits post_flush = 0;
if (!cmd_buffer->state.subpass)
return;
@@ -1317,10 +1184,9 @@ void radv_CmdClearAttachments(
*/
for (uint32_t a = 0; a < attachmentCount; ++a) {
for (uint32_t r = 0; r < rectCount; ++r) {
- emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush);
+ emit_clear(cmd_buffer, &pAttachments[a], &pRects[r]);
}
}
radv_meta_restore(&saved_state, cmd_buffer);
- cmd_buffer->state.flush_bits |= post_flush;
}
diff --git a/lib/mesa/src/amd/vulkan/radv_meta_copy.c b/lib/mesa/src/amd/vulkan/radv_meta_copy.c
index 54dadde78..4c01eb7ac 100644
--- a/lib/mesa/src/amd/vulkan/radv_meta_copy.c
+++ b/lib/mesa/src/amd/vulkan/radv_meta_copy.c
@@ -78,13 +78,13 @@ vk_format_for_size(int bs)
}
static struct radv_meta_blit2d_surf
-blit_surf_for_image_level_layer(struct radv_image *image,
- const VkImageSubresourceLayers *subres)
+blit_surf_for_image_level_layer(struct radv_image* image, VkImageAspectFlags aspectMask,
+ int level, int layer)
{
VkFormat format = image->vk_format;
- if (subres->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
+ if (aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
format = vk_format_depth_only(format);
- else if (subres->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
+ else if (aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
format = vk_format_stencil_only(format);
if (!image->surface.dcc_size)
@@ -93,18 +93,13 @@ blit_surf_for_image_level_layer(struct radv_image *image,
return (struct radv_meta_blit2d_surf) {
.format = format,
.bs = vk_format_get_blocksize(format),
- .level = subres->mipLevel,
- .layer = subres->baseArrayLayer,
+ .level = level,
+ .layer = layer,
.image = image,
- .aspect_mask = subres->aspectMask,
+ .aspect_mask = aspectMask,
};
}
-union meta_saved_state {
- struct radv_meta_saved_state gfx;
- struct radv_meta_saved_compute_state compute;
-};
-
static void
meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_buffer* buffer,
@@ -112,18 +107,14 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
uint32_t regionCount,
const VkBufferImageCopy* pRegions)
{
- bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
- union meta_saved_state saved_state;
+ struct radv_meta_saved_state saved_state;
/* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
* VK_SAMPLE_COUNT_1_BIT."
*/
assert(image->samples == 1);
- if (cs)
- radv_meta_begin_bufimage(cmd_buffer, &saved_state.compute);
- else
- radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);
+ radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
for (unsigned r = 0; r < regionCount; r++) {
@@ -159,7 +150,9 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
/* Create blit surfaces */
struct radv_meta_blit2d_surf img_bsurf =
blit_surf_for_image_level_layer(image,
- &pRegions[r].imageSubresource);
+ pRegions[r].imageSubresource.aspectMask,
+ pRegions[r].imageSubresource.mipLevel,
+ pRegions[r].imageSubresource.baseArrayLayer);
struct radv_meta_blit2d_buffer buf_bsurf = {
.bs = img_bsurf.bs,
@@ -169,8 +162,6 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
.pitch = buf_extent_el.width,
};
- if (image->type == VK_IMAGE_TYPE_3D)
- img_bsurf.layer = img_offset_el.z;
/* Loop through each 3D or array slice */
unsigned num_slices_3d = img_extent_el.depth;
unsigned num_slices_array = pRegions[r].imageSubresource.layerCount;
@@ -183,10 +174,7 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
/* Perform Blit */
- if (cs)
- radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect);
- else
- radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
+ radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
/* Once we've done the blit, all of the actual information about
* the image is embedded in the command buffer so we can just
@@ -202,10 +190,7 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
slice_array++;
}
}
- if (cs)
- radv_meta_end_bufimage(cmd_buffer, &saved_state.compute);
- else
- radv_meta_restore(&saved_state.gfx, cmd_buffer);
+ radv_meta_restore(&saved_state, cmd_buffer);
}
void radv_CmdCopyBufferToImage(
@@ -268,8 +253,9 @@ meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
/* Create blit surfaces */
struct radv_meta_blit2d_surf img_info =
blit_surf_for_image_level_layer(image,
- &pRegions[r].imageSubresource);
-
+ pRegions[r].imageSubresource.aspectMask,
+ pRegions[r].imageSubresource.mipLevel,
+ pRegions[r].imageSubresource.baseArrayLayer);
struct radv_meta_blit2d_buffer buf_info = {
.bs = img_info.bs,
.format = img_info.format,
@@ -278,8 +264,6 @@ meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
.pitch = buf_extent_el.width,
};
- if (image->type == VK_IMAGE_TYPE_3D)
- img_info.layer = img_offset_el.z;
/* Loop through each 3D or array slice */
unsigned num_slices_3d = img_extent_el.depth;
unsigned num_slices_array = pRegions[r].imageSubresource.layerCount;
@@ -322,15 +306,19 @@ void radv_CmdCopyImageToBuffer(
regionCount, pRegions);
}
-static void
-meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image,
- struct radv_image *dest_image,
- uint32_t regionCount,
- const VkImageCopy *pRegions)
+void radv_CmdCopyImage(
+ VkCommandBuffer commandBuffer,
+ VkImage srcImage,
+ VkImageLayout srcImageLayout,
+ VkImage destImage,
+ VkImageLayout destImageLayout,
+ uint32_t regionCount,
+ const VkImageCopy* pRegions)
{
- bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
- union meta_saved_state saved_state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_image, src_image, srcImage);
+ RADV_FROM_HANDLE(radv_image, dest_image, destImage);
+ struct radv_meta_saved_state saved_state;
/* From the Vulkan 1.0 spec:
*
@@ -338,10 +326,8 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
* images, but both images must have the same number of samples.
*/
assert(src_image->samples == dest_image->samples);
- if (cs)
- radv_meta_begin_itoi(cmd_buffer, &saved_state.compute);
- else
- radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);
+
+ radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
for (unsigned r = 0; r < regionCount; r++) {
assert(pRegions[r].srcSubresource.aspectMask ==
@@ -350,11 +336,14 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
/* Create blit surfaces */
struct radv_meta_blit2d_surf b_src =
blit_surf_for_image_level_layer(src_image,
- &pRegions[r].srcSubresource);
-
+ pRegions[r].srcSubresource.aspectMask,
+ pRegions[r].srcSubresource.mipLevel,
+ pRegions[r].srcSubresource.baseArrayLayer);
struct radv_meta_blit2d_surf b_dst =
blit_surf_for_image_level_layer(dest_image,
- &pRegions[r].dstSubresource);
+ pRegions[r].dstSubresource.aspectMask,
+ pRegions[r].dstSubresource.mipLevel,
+ pRegions[r].dstSubresource.baseArrayLayer);
/* for DCC */
b_src.format = b_dst.format;
@@ -373,7 +362,7 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
const VkOffset3D src_offset_el =
meta_region_offset_el(src_image, &pRegions[r].srcOffset);
const VkExtent3D img_extent_el =
- meta_region_extent_el(dest_image, &pRegions[r].extent);
+ meta_region_extent_el(src_image, &pRegions[r].extent);
/* Start creating blit rect */
struct radv_meta_blit2d_rect rect = {
@@ -381,9 +370,6 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
.height = img_extent_el.height,
};
- if (dest_image->type == VK_IMAGE_TYPE_3D)
- b_dst.layer = dst_offset_el.z;
-
/* Loop through each 3D or array slice */
unsigned num_slices_3d = img_extent_el.depth;
unsigned num_slices_array = pRegions[r].dstSubresource.layerCount;
@@ -398,10 +384,7 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
rect.src_y = src_offset_el.y;
/* Perform Blit */
- if (cs)
- radv_meta_image_to_image_cs(cmd_buffer, &b_src, &b_dst, 1, &rect);
- else
- radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);
+ radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);
b_src.layer++;
b_dst.layer++;
@@ -412,45 +395,5 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
}
}
- if (cs)
- radv_meta_end_itoi(cmd_buffer, &saved_state.compute);
- else
- radv_meta_restore(&saved_state.gfx, cmd_buffer);
-}
-
-void radv_CmdCopyImage(
- VkCommandBuffer commandBuffer,
- VkImage srcImage,
- VkImageLayout srcImageLayout,
- VkImage destImage,
- VkImageLayout destImageLayout,
- uint32_t regionCount,
- const VkImageCopy* pRegions)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_image, src_image, srcImage);
- RADV_FROM_HANDLE(radv_image, dest_image, destImage);
-
- meta_copy_image(cmd_buffer, src_image, dest_image,
- regionCount, pRegions);
-}
-
-void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- struct radv_image *linear_image)
-{
- struct VkImageCopy image_copy = { 0 };
-
- image_copy.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
- image_copy.srcSubresource.layerCount = 1;
-
- image_copy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
- image_copy.dstSubresource.layerCount = 1;
-
- image_copy.extent.width = image->extent.width;
- image_copy.extent.height = image->extent.height;
- image_copy.extent.depth = 1;
-
- meta_copy_image(cmd_buffer, image, linear_image,
- 1, &image_copy);
+ radv_meta_restore(&saved_state, cmd_buffer);
}
diff --git a/lib/mesa/src/amd/vulkan/radv_meta_decompress.c b/lib/mesa/src/amd/vulkan/radv_meta_decompress.c
index 854b88a36..0ba6bd075 100644
--- a/lib/mesa/src/amd/vulkan/radv_meta_decompress.c
+++ b/lib/mesa/src/amd/vulkan/radv_meta_decompress.c
@@ -46,7 +46,7 @@ build_nir_vs(void)
nir_variable *v_position;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "meta_depth_decomp_vs");
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_depth_decomp_vs");
a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
"a_position");
@@ -68,8 +68,8 @@ build_nir_fs(void)
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
- b.shader->info->name = ralloc_asprintf(b.shader,
- "meta_depth_decomp_noop_fs");
+ b.shader->info.name = ralloc_asprintf(b.shader,
+ "meta_depth_decomp_noop_fs");
return b.shader;
}
@@ -178,8 +178,8 @@ create_pipeline(struct radv_device *device,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
+ .viewportCount = 0,
+ .scissorCount = 0,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
@@ -210,14 +210,7 @@ create_pipeline(struct radv_device *device,
.depthBoundsTestEnable = false,
.stencilTestEnable = false,
},
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 2,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- },
- },
+ .pDynamicState = NULL,
.renderPass = device->meta_state.depth_decomp.pass,
.subpass = 0,
};
@@ -324,20 +317,20 @@ emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer,
const struct vertex_attrs vertex_data[3] = {
{
.position = {
- -1.0,
- -1.0,
+ dest_offset->x,
+ dest_offset->y,
},
},
{
.position = {
- -1.0,
- 1.0,
+ dest_offset->x,
+ dest_offset->y + depth_decomp_extent->height,
},
},
{
.position = {
- 1.0,
- -1.0,
+ dest_offset->x + depth_decomp_extent->width,
+ dest_offset->y,
},
},
};
@@ -365,20 +358,6 @@ emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer,
pipeline_h);
}
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = dest_offset->x,
- .y = dest_offset->y,
- .width = depth_decomp_extent->width,
- .height = depth_decomp_extent->height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
- .offset = *dest_offset,
- .extent = *depth_decomp_extent,
- });
-
radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
}
@@ -397,13 +376,13 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
uint32_t height = radv_minify(image->extent.height,
subresourceRange->baseMipLevel);
- if (!image->surface.htile_size)
+ if (!image->htile.size)
return;
radv_meta_save_pass(&saved_pass_state, cmd_buffer);
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
- for (uint32_t layer = 0; layer < radv_get_layerCount(image, subresourceRange); layer++) {
+ for (uint32_t layer = 0; layer < subresourceRange->layerCount; layer++) {
struct radv_image_view iview;
radv_image_view_init(&iview, cmd_buffer->device,
@@ -471,7 +450,6 @@ void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkImageSubresourceRange *subresourceRange)
{
- assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
cmd_buffer->device->meta_state.depth_decomp.decompress_pipeline);
}
@@ -480,7 +458,6 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkImageSubresourceRange *subresourceRange)
{
- assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
cmd_buffer->device->meta_state.depth_decomp.resummarize_pipeline);
}
diff --git a/lib/mesa/src/amd/vulkan/radv_meta_fast_clear.c b/lib/mesa/src/amd/vulkan/radv_meta_fast_clear.c
index 3393bcb25..15c9bbcb1 100644
--- a/lib/mesa/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/lib/mesa/src/amd/vulkan/radv_meta_fast_clear.c
@@ -46,7 +46,7 @@ build_nir_vs(void)
nir_variable *v_position;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "meta_fast_clear_vs");
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_fast_clear_vs");
a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
"a_position");
@@ -68,7 +68,7 @@ build_nir_fs(void)
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
- b.shader->info->name = ralloc_asprintf(b.shader,
+ b.shader->info.name = ralloc_asprintf(b.shader,
"meta_fast_clear_noop_fs");
return b.shader;
@@ -214,8 +214,8 @@ create_pipeline(struct radv_device *device,
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
+ .viewportCount = 0,
+ .scissorCount = 0,
},
.pRasterizationState = &rs_state,
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
@@ -227,14 +227,7 @@ create_pipeline(struct radv_device *device,
.alphaToOneEnable = false,
},
.pColorBlendState = &blend_state,
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 2,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- },
- },
+ .pDynamicState = NULL,
.renderPass = device->meta_state.fast_clear_flush.pass,
.subpass = 0,
},
@@ -259,8 +252,8 @@ create_pipeline(struct radv_device *device,
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
+ .viewportCount = 0,
+ .scissorCount = 0,
},
.pRasterizationState = &rs_state,
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
@@ -272,14 +265,7 @@ create_pipeline(struct radv_device *device,
.alphaToOneEnable = false,
},
.pColorBlendState = &blend_state,
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 2,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- },
- },
+ .pDynamicState = NULL,
.renderPass = device->meta_state.fast_clear_flush.pass,
.subpass = 0,
},
@@ -368,24 +354,26 @@ emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer,
const struct vertex_attrs vertex_data[3] = {
{
.position = {
- -1.0,
- -1.0,
+ 0,
+ 0,
},
},
{
.position = {
- -1.0,
- 1.0,
+ 0,
+ resolve_extent->height,
},
},
{
.position = {
- 1.0,
- -1.0,
+ resolve_extent->width,
+ 0,
},
},
};
+ cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB_META);
radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset);
struct radv_buffer vertex_buffer = {
.device = device,
@@ -414,77 +402,58 @@ emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer,
pipeline_h);
}
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = 0,
- .y = 0,
- .width = resolve_extent->width,
- .height = resolve_extent->height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
- .offset = (VkOffset2D) { 0, 0 },
- .extent = (VkExtent2D) { resolve_extent->width, resolve_extent->height },
- });
-
radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META);
+ si_emit_cache_flush(cmd_buffer);
}
/**
*/
void
radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange)
+ struct radv_image *image)
{
struct radv_meta_saved_state saved_state;
struct radv_meta_saved_pass_state saved_pass_state;
VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
- uint32_t layer_count = radv_get_layerCount(image, subresourceRange);
- assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
radv_meta_save_pass(&saved_pass_state, cmd_buffer);
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
- for (uint32_t layer = 0; layer < layer_count; ++layer) {
- struct radv_image_view iview;
-
- radv_image_view_init(&iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ struct radv_image_view iview;
+ radv_image_view_init(&iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = radv_image_to_handle(image),
- .viewType = radv_meta_get_view_type(image),
.format = image->vk_format,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
- .baseArrayLayer = subresourceRange->baseArrayLayer + layer,
+ .baseArrayLayer = 0,
.layerCount = 1,
- },
+ },
},
cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
- VkFramebuffer fb_h;
- radv_CreateFramebuffer(device_h,
- &(VkFramebufferCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = (VkImageView[]) {
- radv_image_view_to_handle(&iview)
- },
+ VkFramebuffer fb_h;
+ radv_CreateFramebuffer(device_h,
+ &(VkFramebufferCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkImageView[]) {
+ radv_image_view_to_handle(&iview)
+ },
.width = image->extent.width,
.height = image->extent.height,
.layers = 1
- },
- &cmd_buffer->pool->alloc,
- &fb_h);
+ },
+ &cmd_buffer->pool->alloc,
+ &fb_h);
- radv_CmdBeginRenderPass(cmd_buffer_h,
+ radv_CmdBeginRenderPass(cmd_buffer_h,
&(VkRenderPassBeginInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.renderPass = cmd_buffer->device->meta_state.fast_clear_flush.pass,
@@ -504,15 +473,14 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
},
VK_SUBPASS_CONTENTS_INLINE);
- emit_fast_clear_flush(cmd_buffer,
- &(VkExtent2D) { image->extent.width, image->extent.height },
- image->fmask.size > 0);
- radv_CmdEndRenderPass(cmd_buffer_h);
+ emit_fast_clear_flush(cmd_buffer,
+ &(VkExtent2D) { image->extent.width, image->extent.height },
+ image->fmask.size > 0);
+ radv_CmdEndRenderPass(cmd_buffer_h);
- radv_DestroyFramebuffer(device_h, fb_h,
- &cmd_buffer->pool->alloc);
+ radv_DestroyFramebuffer(device_h, fb_h,
+ &cmd_buffer->pool->alloc);
- }
radv_meta_restore(&saved_state, cmd_buffer);
radv_meta_restore_pass(&saved_pass_state, cmd_buffer);
}
diff --git a/lib/mesa/src/amd/vulkan/radv_meta_resolve.c b/lib/mesa/src/amd/vulkan/radv_meta_resolve.c
index 52f7246f6..da813eb56 100644
--- a/lib/mesa/src/amd/vulkan/radv_meta_resolve.c
+++ b/lib/mesa/src/amd/vulkan/radv_meta_resolve.c
@@ -33,6 +33,7 @@
*/
struct vertex_attrs {
float position[2]; /**< 3DPRIM_RECTLIST */
+ float tex_position[2];
};
/* passthrough vertex shader */
@@ -44,9 +45,11 @@ build_nir_vs(void)
nir_builder b;
nir_variable *a_position;
nir_variable *v_position;
+ nir_variable *a_tex_position;
+ nir_variable *v_tex_position;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "meta_resolve_vs");
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs");
a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
"a_position");
@@ -56,7 +59,16 @@ build_nir_vs(void)
"gl_Position");
v_position->data.location = VARYING_SLOT_POS;
+ a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ "a_tex_position");
+ a_tex_position->data.location = VERT_ATTRIB_GENERIC1;
+
+ v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+ "v_tex_position");
+ v_tex_position->data.location = VARYING_SLOT_VAR0;
+
nir_copy_var(&b, v_position, a_position);
+ nir_copy_var(&b, v_tex_position, a_tex_position);
return b.shader;
}
@@ -67,16 +79,22 @@ build_nir_fs(void)
{
const struct glsl_type *vec4 = glsl_vec4_type();
nir_builder b;
+ nir_variable *v_tex_position; /* vec4, varying texture coordinate */
nir_variable *f_color; /* vec4, fragment output color */
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
- b.shader->info->name = ralloc_asprintf(b.shader,
- "meta_resolve_fs");
+ b.shader->info.name = ralloc_asprintf(b.shader,
+ "meta_resolve_fs");
+
+ v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ "v_tex_position");
+ v_tex_position->data.location = VARYING_SLOT_VAR0;
f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
"f_color");
f_color->data.location = FRAG_RESULT_DATA0;
- nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf);
+
+ nir_copy_var(&b, f_color, v_tex_position);
return b.shader;
}
@@ -95,11 +113,9 @@ create_pass(struct radv_device *device)
attachments[i].samples = 1;
attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+ attachments[i].initialLayout = VK_IMAGE_LAYOUT_GENERAL;
+ attachments[i].finalLayout = VK_IMAGE_LAYOUT_GENERAL;
}
- attachments[0].initialLayout = VK_IMAGE_LAYOUT_GENERAL;
- attachments[0].finalLayout = VK_IMAGE_LAYOUT_GENERAL;
- attachments[1].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- attachments[1].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
result = radv_CreateRenderPass(device_h,
&(VkRenderPassCreateInfo) {
@@ -118,7 +134,7 @@ create_pass(struct radv_device *device)
},
{
.attachment = 1,
- .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
},
},
.pResolveAttachments = NULL,
@@ -182,7 +198,7 @@ create_pipeline(struct radv_device *device,
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
},
},
- .vertexAttributeDescriptionCount = 1,
+ .vertexAttributeDescriptionCount = 2,
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
{
/* Position */
@@ -191,6 +207,13 @@ create_pipeline(struct radv_device *device,
.format = VK_FORMAT_R32G32_SFLOAT,
.offset = offsetof(struct vertex_attrs, position),
},
+ {
+ /* Texture Coordinate */
+ .location = 1,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = offsetof(struct vertex_attrs, tex_position),
+ },
},
},
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
@@ -200,8 +223,8 @@ create_pipeline(struct radv_device *device,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
+ .viewportCount = 0,
+ .scissorCount = 0,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
@@ -236,14 +259,7 @@ create_pipeline(struct radv_device *device,
}
},
},
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 2,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- },
- },
+ .pDynamicState = NULL,
.renderPass = device->meta_state.resolve.pass,
.subpass = 0,
},
@@ -317,6 +333,7 @@ cleanup:
static void
emit_resolve(struct radv_cmd_buffer *cmd_buffer,
+ const VkOffset2D *src_offset,
const VkOffset2D *dest_offset,
const VkExtent2D *resolve_extent)
{
@@ -326,20 +343,32 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
const struct vertex_attrs vertex_data[3] = {
{
.position = {
- -1.0,
- -1.0,
+ dest_offset->x,
+ dest_offset->y,
+ },
+ .tex_position = {
+ src_offset->x,
+ src_offset->y,
},
},
{
.position = {
- -1.0,
- 1.0,
+ dest_offset->x,
+ dest_offset->y + resolve_extent->height,
+ },
+ .tex_position = {
+ src_offset->x,
+ src_offset->y + resolve_extent->height,
},
},
{
.position = {
- 1.0,
- -1.0,
+ dest_offset->x + resolve_extent->width,
+ dest_offset->y,
+ },
+ .tex_position = {
+ src_offset->x + resolve_extent->width,
+ src_offset->y,
},
},
};
@@ -369,22 +398,9 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
pipeline_h);
}
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = dest_offset->x,
- .y = dest_offset->y,
- .width = resolve_extent->width,
- .height = resolve_extent->height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
- .offset = *dest_offset,
- .extent = *resolve_extent,
- });
-
radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+ si_emit_cache_flush(cmd_buffer);
}
void radv_CmdResolveImage(
@@ -424,6 +440,7 @@ void radv_CmdResolveImage(
if (use_compute_resolve) {
+ radv_fast_clear_flush_image_inplace(cmd_buffer, src_image);
radv_meta_resolve_compute_image(cmd_buffer,
src_image,
src_image_layout,
@@ -449,9 +466,6 @@ void radv_CmdResolveImage(
if (src_image->array_size > 1)
radv_finishme("vkCmdResolveImage: multisample array images");
- if (dest_image->surface.dcc_size) {
- radv_initialize_dcc(cmd_buffer, dest_image, 0xffffffff);
- }
for (uint32_t r = 0; r < region_count; ++r) {
const VkImageResolve *region = &regions[r];
@@ -491,6 +505,8 @@ void radv_CmdResolveImage(
*/
const struct VkExtent3D extent =
radv_sanitize_image_extent(src_image->type, region->extent);
+ const struct VkOffset3D srcOffset =
+ radv_sanitize_image_offset(src_image->type, region->srcOffset);
const struct VkOffset3D dstOffset =
radv_sanitize_image_offset(dest_image->type, region->dstOffset);
@@ -572,6 +588,10 @@ void radv_CmdResolveImage(
emit_resolve(cmd_buffer,
&(VkOffset2D) {
+ .x = srcOffset.x,
+ .y = srcOffset.y,
+ },
+ &(VkOffset2D) {
.x = dstOffset.x,
.y = dstOffset.y,
},
@@ -643,6 +663,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
*/
emit_resolve(cmd_buffer,
&(VkOffset2D) { 0, 0 },
+ &(VkOffset2D) { 0, 0 },
&(VkExtent2D) { fb->width, fb->height });
}
diff --git a/lib/mesa/src/amd/vulkan/radv_meta_resolve_cs.c b/lib/mesa/src/amd/vulkan/radv_meta_resolve_cs.c
index ffa07cac5..c6525b6f3 100644
--- a/lib/mesa/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/lib/mesa/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -47,10 +47,10 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int sampl
GLSL_TYPE_FLOAT);
snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : "float");
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, name);
- b.shader->info->cs.local_size[0] = 16;
- b.shader->info->cs.local_size[1] = 16;
- b.shader->info->cs.local_size[2] = 1;
+ b.shader->info.name = ralloc_strdup(b.shader, name);
+ b.shader->info.cs.local_size[0] = 16;
+ b.shader->info.cs.local_size[1] = 16;
+ b.shader->info.cs.local_size[2] = 1;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
sampler_type, "s_tex");
@@ -64,9 +64,9 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int sampl
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info->cs.local_size[0],
- b.shader->info->cs.local_size[1],
- b.shader->info->cs.local_size[2], 0);
+ b.shader->info.cs.local_size[0],
+ b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
@@ -82,7 +82,7 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int sampl
nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
nir_builder_instr_insert(&b, &dst_offset->instr);
- nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
+ nir_ssa_def *img_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
/* do a txf_ms on each sample */
nir_ssa_def *tmp;
@@ -179,7 +179,6 @@ create_layout(struct radv_device *device)
*/
VkDescriptorSetLayoutCreateInfo ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 2,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
@@ -327,21 +326,6 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_saved_compute_state saved_state;
const uint32_t samples = src_image->samples;
const uint32_t samples_log2 = ffs(samples) - 1;
-
- for (uint32_t r = 0; r < region_count; ++r) {
- const VkImageResolve *region = &regions[r];
- const uint32_t src_base_layer =
- radv_meta_get_iview_layer(src_image, &region->srcSubresource,
- &region->srcOffset);
- VkImageSubresourceRange range;
- range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
- range.baseMipLevel = region->srcSubresource.mipLevel;
- range.levelCount = 1;
- range.baseArrayLayer = src_base_layer;
- range.layerCount = region->srcSubresource.layerCount;
- radv_fast_clear_flush_image_inplace(cmd_buffer, src_image, &range);
- }
-
radv_meta_save_compute(&saved_state, cmd_buffer, 16);
for (uint32_t r = 0; r < region_count; ++r) {
@@ -370,6 +354,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
++layer) {
struct radv_image_view src_iview;
+ VkDescriptorSet set;
radv_image_view_init(&src_iview, cmd_buffer->device,
&(VkImageViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@@ -404,41 +389,49 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer, VK_IMAGE_USAGE_STORAGE_BIT);
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.resolve_compute.p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&dest_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- }
- });
+ radv_temp_descriptor_set_create(device, cmd_buffer,
+ device->meta_state.resolve_compute.ds_layout,
+ &set);
+
+ radv_UpdateDescriptorSets(radv_device_to_handle(device),
+ 2, /* writeCount */
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = set,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = NULL,
+ .imageView = radv_image_view_to_handle(&src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = set,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = NULL,
+ .imageView = radv_image_view_to_handle(&dest_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }
+ }
+ }, 0, NULL);
+
+ radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.resolve_compute.p_layout, 0, 1,
+ &set, 0, NULL);
VkPipeline pipeline;
if (vk_format_is_int(src_image->vk_format))
@@ -461,6 +454,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
push_constants);
radv_unaligned_dispatch(cmd_buffer, extent.width, extent.height, 1);
+ radv_temp_descriptor_set_destroy(cmd_buffer->device, set);
}
}
radv_meta_restore_compute(&saved_state, cmd_buffer, 16);
diff --git a/lib/mesa/src/amd/vulkan/radv_pipeline.c b/lib/mesa/src/amd/vulkan/radv_pipeline.c
index e0c67ce5e..7c10b78e7 100644
--- a/lib/mesa/src/amd/vulkan/radv_pipeline.c
+++ b/lib/mesa/src/amd/vulkan/radv_pipeline.c
@@ -41,7 +41,6 @@
#include "ac_nir_to_llvm.h"
#include "vk_format.h"
#include "util/debug.h"
-
void radv_shader_variant_destroy(struct radv_device *device,
struct radv_shader_variant *variant);
@@ -105,22 +104,6 @@ void radv_DestroyShaderModule(
vk_free2(&device->alloc, pAllocator, module);
}
-
-static void
-radv_pipeline_destroy(struct radv_device *device,
- struct radv_pipeline *pipeline,
- const VkAllocationCallbacks* allocator)
-{
- for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i)
- if (pipeline->shaders[i])
- radv_shader_variant_destroy(device, pipeline->shaders[i]);
-
- if (pipeline->gs_copy_shader)
- radv_shader_variant_destroy(device, pipeline->gs_copy_shader);
-
- vk_free2(&device->alloc, allocator, pipeline);
-}
-
void radv_DestroyPipeline(
VkDevice _device,
VkPipeline _pipeline,
@@ -132,7 +115,11 @@ void radv_DestroyPipeline(
if (!_pipeline)
return;
- radv_pipeline_destroy(device, pipeline, pAllocator);
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i)
+ if (pipeline->shaders[i])
+ radv_shader_variant_destroy(device, pipeline->shaders[i]);
+
+ vk_free2(&device->alloc, pAllocator, pipeline);
}
@@ -145,7 +132,6 @@ radv_optimize_nir(struct nir_shader *shader)
progress = false;
NIR_PASS_V(shader, nir_lower_vars_to_ssa);
- NIR_PASS_V(shader, nir_lower_64bit_pack);
NIR_PASS_V(shader, nir_lower_alu_to_scalar);
NIR_PASS_V(shader, nir_lower_phis_to_scalar);
@@ -202,35 +188,24 @@ radv_shader_compile_to_nir(struct radv_device *device,
assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
spec_entries[i].id = spec_info->pMapEntries[i].constantID;
- if (spec_info->dataSize == 8)
- spec_entries[i].data64 = *(const uint64_t *)data;
- else
- spec_entries[i].data32 = *(const uint32_t *)data;
+ spec_entries[i].data = *(const uint32_t *)data;
}
}
- const struct nir_spirv_supported_extensions supported_ext = {
- .draw_parameters = true,
- .float64 = true,
- .image_read_without_format = true,
- .image_write_without_format = true,
- .tessellation = true,
- };
+
entry_point = spirv_to_nir(spirv, module->size / 4,
spec_entries, num_spec_entries,
- stage, entrypoint_name, &supported_ext, &nir_options);
+ stage, entrypoint_name, &nir_options);
nir = entry_point->shader;
assert(nir->stage == stage);
nir_validate_shader(nir);
free(spec_entries);
- /* We have to lower away local constant initializers right before we
- * inline functions. That way they get properly initialized at the top
- * of the function and not at the top of its caller.
- */
- NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
- NIR_PASS_V(nir, nir_lower_returns);
- NIR_PASS_V(nir, nir_inline_functions);
+ nir_lower_returns(nir);
+ nir_validate_shader(nir);
+
+ nir_inline_functions(nir);
+ nir_validate_shader(nir);
/* Pick off the single entrypoint that we want */
foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
@@ -240,24 +215,26 @@ radv_shader_compile_to_nir(struct radv_device *device,
assert(exec_list_length(&nir->functions) == 1);
entry_point->name = ralloc_strdup(entry_point, "main");
- NIR_PASS_V(nir, nir_remove_dead_variables,
- nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
+ nir_remove_dead_variables(nir, nir_var_shader_in);
+ nir_remove_dead_variables(nir, nir_var_shader_out);
+ nir_remove_dead_variables(nir, nir_var_system_value);
+ nir_validate_shader(nir);
- /* Now that we've deleted all but the main function, we can go ahead and
- * lower the rest of the constant initializers.
- */
- NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
- NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
+ nir_lower_system_values(nir);
+ nir_validate_shader(nir);
}
/* Vulkan uses the separate-shader linking model */
- nir->info->separate_shader = true;
+ nir->info.separate_shader = true;
+
+ // nir = brw_preprocess_nir(compiler, nir);
nir_shader_gather_info(nir, entry_point->impl);
nir_variable_mode indirect_mask = 0;
+ // if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput)
indirect_mask |= nir_var_shader_in;
+ // if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp)
indirect_mask |= nir_var_local;
nir_lower_indirect_derefs(nir, indirect_mask);
@@ -280,84 +257,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
return nir;
}
-static const char *radv_get_shader_name(struct radv_shader_variant *var,
- gl_shader_stage stage)
-{
- switch (stage) {
- case MESA_SHADER_VERTEX: return var->info.vs.as_ls ? "Vertex Shader as LS" : var->info.vs.as_es ? "Vertex Shader as ES" : "Vertex Shader as VS";
- case MESA_SHADER_GEOMETRY: return "Geometry Shader";
- case MESA_SHADER_FRAGMENT: return "Pixel Shader";
- case MESA_SHADER_COMPUTE: return "Compute Shader";
- case MESA_SHADER_TESS_CTRL: return "Tessellation Control Shader";
- case MESA_SHADER_TESS_EVAL: return var->info.tes.as_es ? "Tessellation Evaluation Shader as ES" : "Tessellation Evaluation Shader as VS";
- default:
- return "Unknown shader";
- };
-
-}
-static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pipeline *pipeline)
-{
- unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
- struct radv_shader_variant *var;
- struct ac_shader_config *conf;
- int i;
- FILE *file = stderr;
- unsigned max_simd_waves = 10;
- unsigned lds_per_wave = 0;
-
- for (i = 0; i < MESA_SHADER_STAGES; i++) {
- if (!pipeline->shaders[i])
- continue;
- var = pipeline->shaders[i];
-
- conf = &var->config;
-
- if (i == MESA_SHADER_FRAGMENT) {
- lds_per_wave = conf->lds_size * lds_increment +
- align(var->info.fs.num_interp * 48, lds_increment);
- }
-
- if (conf->num_sgprs) {
- if (device->physical_device->rad_info.chip_class >= VI)
- max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
- else
- max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
- }
-
- if (conf->num_vgprs)
- max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
-
- /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
- * that PS can use.
- */
- if (lds_per_wave)
- max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
-
- fprintf(file, "\n%s:\n",
- radv_get_shader_name(var, i));
- if (i == MESA_SHADER_FRAGMENT) {
- fprintf(file, "*** SHADER CONFIG ***\n"
- "SPI_PS_INPUT_ADDR = 0x%04x\n"
- "SPI_PS_INPUT_ENA = 0x%04x\n",
- conf->spi_ps_input_addr, conf->spi_ps_input_ena);
- }
- fprintf(file, "*** SHADER STATS ***\n"
- "SGPRS: %d\n"
- "VGPRS: %d\n"
- "Spilled SGPRs: %d\n"
- "Spilled VGPRs: %d\n"
- "Code Size: %d bytes\n"
- "LDS: %d blocks\n"
- "Scratch: %d bytes per wave\n"
- "Max Waves: %d\n"
- "********************\n\n\n",
- conf->num_sgprs, conf->num_vgprs,
- conf->spilled_sgprs, conf->spilled_vgprs, var->code_size,
- conf->lds_size, conf->scratch_bytes_per_wave,
- max_simd_waves);
- }
-}
-
void radv_shader_variant_destroy(struct radv_device *device,
struct radv_shader_variant *variant)
{
@@ -368,36 +267,54 @@ void radv_shader_variant_destroy(struct radv_device *device,
free(variant);
}
-static void radv_fill_shader_variant(struct radv_device *device,
- struct radv_shader_variant *variant,
- struct ac_shader_binary *binary,
- gl_shader_stage stage)
+static
+struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device,
+ struct nir_shader *shader,
+ struct radv_pipeline_layout *layout,
+ const union ac_shader_variant_key *key,
+ void** code_out,
+ unsigned *code_size_out,
+ bool dump)
{
- bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0;
- unsigned vgpr_comp_cnt = 0;
+ struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
+ enum radeon_family chip_family = device->instance->physicalDevice.rad_info.family;
+ LLVMTargetMachineRef tm;
+ if (!variant)
+ return NULL;
+
+ struct ac_nir_compiler_options options = {0};
+ options.layout = layout;
+ if (key)
+ options.key = *key;
- if (scratch_enabled && !device->llvm_supports_spill)
- radv_finishme("shader scratch support only available with LLVM 4.0");
+ struct ac_shader_binary binary;
- variant->code_size = binary->code_size;
- variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) |
- S_00B12C_SCRATCH_EN(scratch_enabled);
+ options.unsafe_math = env_var_as_boolean("RADV_UNSAFE_MATH", false);
+ options.family = chip_family;
+ options.chip_class = device->instance->physicalDevice.rad_info.chip_class;
+ tm = ac_create_target_machine(chip_family);
+ ac_compile_nir_shader(tm, &binary, &variant->config,
+ &variant->info, shader, &options, dump);
+ LLVMDisposeTargetMachine(tm);
- switch (stage) {
- case MESA_SHADER_TESS_EVAL:
- vgpr_comp_cnt = 3;
- /* fallthrough */
- case MESA_SHADER_TESS_CTRL:
- variant->rsrc2 |= S_00B42C_OC_LDS_EN(1);
- break;
+ bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0;
+ unsigned vgpr_comp_cnt = 0;
+
+ if (scratch_enabled)
+ radv_finishme("shader scratch space");
+ switch (shader->stage) {
case MESA_SHADER_VERTEX:
- case MESA_SHADER_GEOMETRY:
+ variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) |
+ S_00B12C_SCRATCH_EN(scratch_enabled);
vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt;
break;
case MESA_SHADER_FRAGMENT:
+ variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) |
+ S_00B12C_SCRATCH_EN(scratch_enabled);
break;
case MESA_SHADER_COMPUTE:
- variant->rsrc2 |=
+ variant->rsrc2 = S_00B84C_USER_SGPR(variant->info.num_user_sgprs) |
+ S_00B84C_SCRATCH_EN(scratch_enabled) |
S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) |
S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) |
S_00B84C_TG_SIZE_EN(1) |
@@ -414,48 +331,13 @@ static void radv_fill_shader_variant(struct radv_device *device,
S_00B848_DX10_CLAMP(1) |
S_00B848_FLOAT_MODE(variant->config.float_mode);
- variant->bo = device->ws->buffer_create(device->ws, binary->code_size, 256,
- RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
+ variant->bo = device->ws->buffer_create(device->ws, binary.code_size, 256,
+ RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
void *ptr = device->ws->buffer_map(variant->bo);
- memcpy(ptr, binary->code, binary->code_size);
+ memcpy(ptr, binary.code, binary.code_size);
device->ws->buffer_unmap(variant->bo);
-
-}
-
-static struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device,
- struct nir_shader *shader,
- struct radv_pipeline_layout *layout,
- const union ac_shader_variant_key *key,
- void** code_out,
- unsigned *code_size_out,
- bool dump)
-{
- struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
- enum radeon_family chip_family = device->physical_device->rad_info.family;
- LLVMTargetMachineRef tm;
- if (!variant)
- return NULL;
-
- struct ac_nir_compiler_options options = {0};
- options.layout = layout;
- if (key)
- options.key = *key;
-
- struct ac_shader_binary binary;
-
- options.unsafe_math = !!(device->debug_flags & RADV_DEBUG_UNSAFE_MATH);
- options.family = chip_family;
- options.chip_class = device->physical_device->rad_info.chip_class;
- options.supports_spill = device->llvm_supports_spill;
- tm = ac_create_target_machine(chip_family, options.supports_spill);
- ac_compile_nir_shader(tm, &binary, &variant->config,
- &variant->info, shader, &options, dump);
- LLVMDisposeTargetMachine(tm);
-
- radv_fill_shader_variant(device, variant, &binary, shader->stage);
-
if (code_out) {
*code_out = binary.code;
*code_size_out = binary.code_size;
@@ -470,43 +352,6 @@ static struct radv_shader_variant *radv_shader_variant_create(struct radv_device
return variant;
}
-static struct radv_shader_variant *
-radv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline,
- struct nir_shader *nir,
- void** code_out,
- unsigned *code_size_out,
- bool dump_shader)
-{
- struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
- enum radeon_family chip_family = pipeline->device->physical_device->rad_info.family;
- LLVMTargetMachineRef tm;
- if (!variant)
- return NULL;
-
- struct ac_nir_compiler_options options = {0};
- struct ac_shader_binary binary;
- options.family = chip_family;
- options.chip_class = pipeline->device->physical_device->rad_info.chip_class;
- options.supports_spill = pipeline->device->llvm_supports_spill;
- tm = ac_create_target_machine(chip_family, options.supports_spill);
- ac_create_gs_copy_shader(tm, nir, &binary, &variant->config, &variant->info, &options, dump_shader);
- LLVMDisposeTargetMachine(tm);
-
- radv_fill_shader_variant(pipeline->device, variant, &binary, MESA_SHADER_VERTEX);
-
- if (code_out) {
- *code_out = binary.code;
- *code_size_out = binary.code_size;
- } else
- free(binary.code);
- free(binary.config);
- free(binary.rodata);
- free(binary.global_symbol_offsets);
- free(binary.relocs);
- free(binary.disasm_string);
- variant->ref_count = 1;
- return variant;
-}
static struct radv_shader_variant *
radv_pipeline_compile(struct radv_pipeline *pipeline,
@@ -516,41 +361,29 @@ radv_pipeline_compile(struct radv_pipeline *pipeline,
gl_shader_stage stage,
const VkSpecializationInfo *spec_info,
struct radv_pipeline_layout *layout,
- const union ac_shader_variant_key *key)
+ const union ac_shader_variant_key *key,
+ bool dump)
{
unsigned char sha1[20];
- unsigned char gs_copy_sha1[20];
struct radv_shader_variant *variant;
nir_shader *nir;
void *code = NULL;
unsigned code_size = 0;
- bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);
if (module->nir)
- _mesa_sha1_compute(module->nir->info->name,
- strlen(module->nir->info->name),
+ _mesa_sha1_compute(module->nir->info.name,
+ strlen(module->nir->info.name),
module->sha1);
- radv_hash_shader(sha1, module, entrypoint, spec_info, layout, key, 0);
- if (stage == MESA_SHADER_GEOMETRY)
- radv_hash_shader(gs_copy_sha1, module, entrypoint, spec_info,
- layout, key, 1);
-
- variant = radv_create_shader_variant_from_pipeline_cache(pipeline->device,
- cache,
- sha1);
-
- if (stage == MESA_SHADER_GEOMETRY) {
- pipeline->gs_copy_shader =
- radv_create_shader_variant_from_pipeline_cache(
- pipeline->device,
- cache,
- gs_copy_sha1);
- }
+ radv_hash_shader(sha1, module, entrypoint, spec_info, layout, key);
- if (variant &&
- (stage != MESA_SHADER_GEOMETRY || pipeline->gs_copy_shader))
- return variant;
+ if (cache) {
+ variant = radv_create_shader_variant_from_pipeline_cache(pipeline->device,
+ cache,
+ sha1);
+ if (variant)
+ return variant;
+ }
nir = radv_shader_compile_to_nir(pipeline->device,
module, entrypoint, stage,
@@ -558,31 +391,12 @@ radv_pipeline_compile(struct radv_pipeline *pipeline,
if (nir == NULL)
return NULL;
- if (!variant) {
- variant = radv_shader_variant_create(pipeline->device, nir,
- layout, key, &code,
- &code_size, dump);
- }
-
- if (stage == MESA_SHADER_GEOMETRY && !pipeline->gs_copy_shader) {
- void *gs_copy_code = NULL;
- unsigned gs_copy_code_size = 0;
- pipeline->gs_copy_shader = radv_pipeline_create_gs_copy_shader(
- pipeline, nir, &gs_copy_code, &gs_copy_code_size, dump);
-
- if (pipeline->gs_copy_shader) {
- pipeline->gs_copy_shader =
- radv_pipeline_cache_insert_shader(cache,
- gs_copy_sha1,
- pipeline->gs_copy_shader,
- gs_copy_code,
- gs_copy_code_size);
- }
- }
+ variant = radv_shader_variant_create(pipeline->device, nir, layout, key,
+ &code, &code_size, dump);
if (!module->nir)
- ralloc_free(nir);
+ ralloc_free(nir);
- if (variant)
+ if (variant && cache)
variant = radv_pipeline_cache_insert_shader(cache, sha1, variant,
code, code_size);
@@ -591,173 +405,6 @@ radv_pipeline_compile(struct radv_pipeline *pipeline,
return variant;
}
-static union ac_shader_variant_key
-radv_compute_tes_key(bool as_es)
-{
- union ac_shader_variant_key key;
- memset(&key, 0, sizeof(key));
- key.tes.as_es = as_es;
- return key;
-}
-
-static union ac_shader_variant_key
-radv_compute_tcs_key(unsigned primitive_mode, unsigned input_vertices)
-{
- union ac_shader_variant_key key;
- memset(&key, 0, sizeof(key));
- key.tcs.primitive_mode = primitive_mode;
- key.tcs.input_vertices = input_vertices;
- return key;
-}
-
-static void
-radv_tess_pipeline_compile(struct radv_pipeline *pipeline,
- struct radv_pipeline_cache *cache,
- struct radv_shader_module *tcs_module,
- struct radv_shader_module *tes_module,
- const char *tcs_entrypoint,
- const char *tes_entrypoint,
- const VkSpecializationInfo *tcs_spec_info,
- const VkSpecializationInfo *tes_spec_info,
- struct radv_pipeline_layout *layout,
- unsigned input_vertices)
-{
- unsigned char tcs_sha1[20], tes_sha1[20];
- struct radv_shader_variant *tes_variant = NULL, *tcs_variant = NULL;
- nir_shader *tes_nir, *tcs_nir;
- void *tes_code = NULL, *tcs_code = NULL;
- unsigned tes_code_size = 0, tcs_code_size = 0;
- union ac_shader_variant_key tes_key = radv_compute_tes_key(radv_pipeline_has_gs(pipeline));
- union ac_shader_variant_key tcs_key;
- bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);
-
- if (tes_module->nir)
- _mesa_sha1_compute(tes_module->nir->info->name,
- strlen(tes_module->nir->info->name),
- tes_module->sha1);
- radv_hash_shader(tes_sha1, tes_module, tes_entrypoint, tes_spec_info, layout, &tes_key, 0);
-
- tes_variant = radv_create_shader_variant_from_pipeline_cache(pipeline->device,
- cache,
- tes_sha1);
-
- if (tes_variant) {
- tcs_key = radv_compute_tcs_key(tes_variant->info.tes.primitive_mode, input_vertices);
-
- if (tcs_module->nir)
- _mesa_sha1_compute(tcs_module->nir->info->name,
- strlen(tcs_module->nir->info->name),
- tcs_module->sha1);
-
- radv_hash_shader(tcs_sha1, tcs_module, tcs_entrypoint, tcs_spec_info, layout, &tcs_key, 0);
-
- tcs_variant = radv_create_shader_variant_from_pipeline_cache(pipeline->device,
- cache,
- tcs_sha1);
- }
-
- if (tcs_variant && tes_variant) {
- pipeline->shaders[MESA_SHADER_TESS_CTRL] = tcs_variant;
- pipeline->shaders[MESA_SHADER_TESS_EVAL] = tes_variant;
- return;
- }
-
- tes_nir = radv_shader_compile_to_nir(pipeline->device,
- tes_module, tes_entrypoint, MESA_SHADER_TESS_EVAL,
- tes_spec_info, dump);
- if (tes_nir == NULL)
- return;
-
- tcs_nir = radv_shader_compile_to_nir(pipeline->device,
- tcs_module, tcs_entrypoint, MESA_SHADER_TESS_CTRL,
- tcs_spec_info, dump);
- if (tcs_nir == NULL)
- return;
-
- nir_lower_tes_patch_vertices(tes_nir,
- tcs_nir->info->tess.tcs_vertices_out);
-
- tes_variant = radv_shader_variant_create(pipeline->device, tes_nir,
- layout, &tes_key, &tes_code,
- &tes_code_size, dump);
-
- tcs_key = radv_compute_tcs_key(tes_nir->info->tess.primitive_mode, input_vertices);
- if (tcs_module->nir)
- _mesa_sha1_compute(tcs_module->nir->info->name,
- strlen(tcs_module->nir->info->name),
- tcs_module->sha1);
-
- radv_hash_shader(tcs_sha1, tcs_module, tcs_entrypoint, tcs_spec_info, layout, &tcs_key, 0);
-
- tcs_variant = radv_shader_variant_create(pipeline->device, tcs_nir,
- layout, &tcs_key, &tcs_code,
- &tcs_code_size, dump);
-
- if (!tes_module->nir)
- ralloc_free(tes_nir);
-
- if (!tcs_module->nir)
- ralloc_free(tcs_nir);
-
- if (tes_variant)
- tes_variant = radv_pipeline_cache_insert_shader(cache, tes_sha1, tes_variant,
- tes_code, tes_code_size);
-
- if (tcs_variant)
- tcs_variant = radv_pipeline_cache_insert_shader(cache, tcs_sha1, tcs_variant,
- tcs_code, tcs_code_size);
-
- if (tes_code)
- free(tes_code);
- if (tcs_code)
- free(tcs_code);
- pipeline->shaders[MESA_SHADER_TESS_CTRL] = tcs_variant;
- pipeline->shaders[MESA_SHADER_TESS_EVAL] = tes_variant;
- return;
-}
-
-static VkResult
-radv_pipeline_scratch_init(struct radv_device *device,
- struct radv_pipeline *pipeline)
-{
- unsigned scratch_bytes_per_wave = 0;
- unsigned max_waves = 0;
- unsigned min_waves = 1;
-
- for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
- if (pipeline->shaders[i]) {
- unsigned max_stage_waves = device->scratch_waves;
-
- scratch_bytes_per_wave = MAX2(scratch_bytes_per_wave,
- pipeline->shaders[i]->config.scratch_bytes_per_wave);
-
- max_stage_waves = MIN2(max_stage_waves,
- 4 * device->physical_device->rad_info.num_good_compute_units *
- (256 / pipeline->shaders[i]->config.num_vgprs));
- max_waves = MAX2(max_waves, max_stage_waves);
- }
- }
-
- if (pipeline->shaders[MESA_SHADER_COMPUTE]) {
- unsigned group_size = pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[0] *
- pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[1] *
- pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[2];
- min_waves = MAX2(min_waves, round_up_u32(group_size, 64));
- }
-
- if (scratch_bytes_per_wave)
- max_waves = MIN2(max_waves, 0xffffffffu / scratch_bytes_per_wave);
-
- if (scratch_bytes_per_wave && max_waves < min_waves) {
- /* Not really true at this moment, but will be true on first
- * execution. Avoid having hanging shaders. */
- return VK_ERROR_OUT_OF_DEVICE_MEMORY;
- }
- pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave;
- pipeline->max_waves = max_waves;
- return VK_SUCCESS;
-}
-
static uint32_t si_translate_blend_function(VkBlendOp op)
{
switch (op) {
@@ -1021,6 +668,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
if (blend_mrt0_is_dual_src)
col_format |= (col_format & 0xf) << 4;
+ if (!col_format)
+ col_format |= V_028714_SPI_SHADER_32_R;
blend->spi_shader_col_format = col_format;
}
@@ -1259,7 +908,7 @@ radv_pipeline_init_raster_state(struct radv_pipeline *pipeline,
S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
S_0286D4_PNT_SPRITE_TOP_1(0); // vulkan is top to bottom - 1.0 at bottom
-
+ raster->pa_cl_vs_out_cntl = S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1);
raster->pa_cl_clip_cntl = S_028810_PS_UCP_MODE(3) |
S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions.
S_028810_ZCLIP_NEAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) |
@@ -1292,19 +941,11 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState;
struct radv_blend_state *blend = &pipeline->graphics.blend;
struct radv_multisample_state *ms = &pipeline->graphics.ms;
- unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes;
+ unsigned num_tile_pipes = pipeline->device->instance->physicalDevice.rad_info.num_tile_pipes;
int ps_iter_samples = 1;
uint32_t mask = 0xffff;
- if (vkms)
- ms->num_samples = vkms->rasterizationSamples;
- else
- ms->num_samples = 1;
-
- if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.force_persample) {
- ps_iter_samples = ms->num_samples;
- }
-
+ ms->num_samples = vkms->rasterizationSamples;
ms->pa_sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
ms->pa_sc_aa_config = 0;
ms->db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
@@ -1320,8 +961,8 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1);
- if (ms->num_samples > 1) {
- unsigned log_samples = util_logbase2(ms->num_samples);
+ if (vkms->rasterizationSamples > 1) {
+ unsigned log_samples = util_logbase2(vkms->rasterizationSamples);
unsigned log_ps_iter_samples = util_logbase2(util_next_power_of_two(ps_iter_samples));
ms->pa_sc_mode_cntl_0 = S_028A48_MSAA_ENABLE(1);
ms->pa_sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); /* CM_R_028BDC_PA_SC_LINE_CNTL */
@@ -1335,40 +976,17 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
ms->pa_sc_mode_cntl_1 |= EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
}
- if (vkms) {
- if (vkms->alphaToCoverageEnable)
- blend->db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
+ if (vkms->alphaToCoverageEnable)
+ blend->db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
- if (vkms->pSampleMask)
- mask = vkms->pSampleMask[0] & 0xffff;
+ if (vkms->pSampleMask) {
+ mask = vkms->pSampleMask[0] & 0xffff;
}
ms->pa_sc_aa_mask[0] = mask | (mask << 16);
ms->pa_sc_aa_mask[1] = mask | (mask << 16);
}
-static bool
-radv_prim_can_use_guardband(enum VkPrimitiveTopology topology)
-{
- switch (topology) {
- case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
- case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
- case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
- case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
- return false;
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
- return true;
- default:
- unreachable("unhandled primitive type");
- }
-}
-
static uint32_t
si_translate_prim(enum VkPrimitiveTopology topology)
{
@@ -1402,29 +1020,6 @@ si_translate_prim(enum VkPrimitiveTopology topology)
}
static uint32_t
-si_conv_gl_prim_to_gs_out(unsigned gl_prim)
-{
- switch (gl_prim) {
- case 0: /* GL_POINTS */
- return V_028A6C_OUTPRIM_TYPE_POINTLIST;
- case 1: /* GL_LINES */
- case 3: /* GL_LINE_STRIP */
- case 0xA: /* GL_LINE_STRIP_ADJACENCY_ARB */
- case 0x8E7A: /* GL_ISOLINES */
- return V_028A6C_OUTPRIM_TYPE_LINESTRIP;
-
- case 4: /* GL_TRIANGLES */
- case 0xc: /* GL_TRIANGLES_ADJACENCY_ARB */
- case 5: /* GL_TRIANGLE_STRIP */
- case 7: /* GL_QUADS */
- return V_028A6C_OUTPRIM_TYPE_TRISTRIP;
- default:
- assert(0);
- return 0;
- }
-}
-
-static uint32_t
si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology)
{
switch (topology) {
@@ -1592,7 +1187,7 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
}
static union ac_shader_variant_key
-radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es, bool as_ls)
+radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
union ac_shader_variant_key key;
const VkPipelineVertexInputStateCreateInfo *input_state =
@@ -1600,8 +1195,6 @@ radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es,
memset(&key, 0, sizeof(key));
key.vs.instance_rate_inputs = 0;
- key.vs.as_es = as_es;
- key.vs.as_ls = as_ls;
for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
unsigned binding;
@@ -1612,334 +1205,6 @@ radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es,
return key;
}
-static void
-calculate_gs_ring_sizes(struct radv_pipeline *pipeline)
-{
- struct radv_device *device = pipeline->device;
- unsigned num_se = device->physical_device->rad_info.max_se;
- unsigned wave_size = 64;
- unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
- unsigned gs_vertex_reuse = 16 * num_se; /* GS_VERTEX_REUSE register (per SE) */
- unsigned alignment = 256 * num_se;
- /* The maximum size is 63.999 MB per SE. */
- unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
- struct ac_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
- struct ac_es_output_info *es_info = radv_pipeline_has_tess(pipeline) ?
- &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.es_info :
- &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.es_info;
-
- /* Calculate the minimum size. */
- unsigned min_esgs_ring_size = align(es_info->esgs_itemsize * gs_vertex_reuse *
- wave_size, alignment);
- /* These are recommended sizes, not minimum sizes. */
- unsigned esgs_ring_size = max_gs_waves * 2 * wave_size *
- es_info->esgs_itemsize * gs_info->gs.vertices_in;
- unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size *
- gs_info->gs.max_gsvs_emit_size * 1; // no streams in VK (gs->max_gs_stream + 1);
-
- min_esgs_ring_size = align(min_esgs_ring_size, alignment);
- esgs_ring_size = align(esgs_ring_size, alignment);
- gsvs_ring_size = align(gsvs_ring_size, alignment);
-
- pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
- pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
-}
-
-static void si_multiwave_lds_size_workaround(struct radv_device *device,
- unsigned *lds_size)
-{
- /* SPI barrier management bug:
- * Make sure we have at least 4k of LDS in use to avoid the bug.
- * It applies to workgroup sizes of more than one wavefront.
- */
- if (device->physical_device->rad_info.family == CHIP_BONAIRE ||
- device->physical_device->rad_info.family == CHIP_KABINI ||
- device->physical_device->rad_info.family == CHIP_MULLINS)
- *lds_size = MAX2(*lds_size, 8);
-}
-
-static void
-calculate_tess_state(struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
-{
- unsigned num_tcs_input_cp = pCreateInfo->pTessellationState->patchControlPoints;
- unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
- unsigned num_tcs_patch_outputs;
- unsigned input_vertex_size, output_vertex_size, pervertex_output_patch_size;
- unsigned input_patch_size, output_patch_size, output_patch0_offset;
- unsigned lds_size, hardware_lds_size;
- unsigned perpatch_output_offset;
- unsigned num_patches;
- struct radv_tessellation_state *tess = &pipeline->graphics.tess;
-
- /* This calculates how shader inputs and outputs among VS, TCS, and TES
- * are laid out in LDS. */
- num_tcs_inputs = util_last_bit64(pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.outputs_written);
-
- num_tcs_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.outputs_written); //tcs->outputs_written
- num_tcs_output_cp = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; //TCS VERTICES OUT
- num_tcs_patch_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.patch_outputs_written);
-
- /* Ensure that we only need one wave per SIMD so we don't need to check
- * resource usage. Also ensures that the number of tcs in and out
- * vertices per threadgroup are at most 256.
- */
- input_vertex_size = num_tcs_inputs * 16;
- output_vertex_size = num_tcs_outputs * 16;
-
- input_patch_size = num_tcs_input_cp * input_vertex_size;
-
- pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
- output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
- /* Ensure that we only need one wave per SIMD so we don't need to check
- * resource usage. Also ensures that the number of tcs in and out
- * vertices per threadgroup are at most 256.
- */
- num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4;
-
- /* Make sure that the data fits in LDS. This assumes the shaders only
- * use LDS for the inputs and outputs.
- */
- hardware_lds_size = pipeline->device->physical_device->rad_info.chip_class >= CIK ? 65536 : 32768;
- num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
-
- /* Make sure the output data fits in the offchip buffer */
- num_patches = MIN2(num_patches,
- (pipeline->device->tess_offchip_block_dw_size * 4) /
- output_patch_size);
-
- /* Not necessary for correctness, but improves performance. The
- * specific value is taken from the proprietary driver.
- */
- num_patches = MIN2(num_patches, 40);
-
- /* SI bug workaround - limit LS-HS threadgroups to only one wave. */
- if (pipeline->device->physical_device->rad_info.chip_class == SI) {
- unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
- num_patches = MIN2(num_patches, one_wave);
- }
-
- output_patch0_offset = input_patch_size * num_patches;
- perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
-
- lds_size = output_patch0_offset + output_patch_size * num_patches;
-
- if (pipeline->device->physical_device->rad_info.chip_class >= CIK) {
- assert(lds_size <= 65536);
- lds_size = align(lds_size, 512) / 512;
- } else {
- assert(lds_size <= 32768);
- lds_size = align(lds_size, 256) / 256;
- }
- si_multiwave_lds_size_workaround(pipeline->device, &lds_size);
-
- tess->lds_size = lds_size;
-
- tess->tcs_in_layout = (input_patch_size / 4) |
- ((input_vertex_size / 4) << 13);
- tess->tcs_out_layout = (output_patch_size / 4) |
- ((output_vertex_size / 4) << 13);
- tess->tcs_out_offsets = (output_patch0_offset / 16) |
- ((perpatch_output_offset / 16) << 16);
- tess->offchip_layout = (pervertex_output_patch_size * num_patches << 16) |
- (num_tcs_output_cp << 9) | num_patches;
-
- tess->ls_hs_config = S_028B58_NUM_PATCHES(num_patches) |
- S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
- S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
- tess->num_patches = num_patches;
- tess->num_tcs_input_cp = num_tcs_input_cp;
-
- struct radv_shader_variant *tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
- unsigned type = 0, partitioning = 0, topology = 0, distribution_mode = 0;
-
- switch (tes->info.tes.primitive_mode) {
- case GL_TRIANGLES:
- type = V_028B6C_TESS_TRIANGLE;
- break;
- case GL_QUADS:
- type = V_028B6C_TESS_QUAD;
- break;
- case GL_ISOLINES:
- type = V_028B6C_TESS_ISOLINE;
- break;
- }
-
- switch (tes->info.tes.spacing) {
- case TESS_SPACING_EQUAL:
- partitioning = V_028B6C_PART_INTEGER;
- break;
- case TESS_SPACING_FRACTIONAL_ODD:
- partitioning = V_028B6C_PART_FRAC_ODD;
- break;
- case TESS_SPACING_FRACTIONAL_EVEN:
- partitioning = V_028B6C_PART_FRAC_EVEN;
- break;
- default:
- break;
- }
-
- if (tes->info.tes.point_mode)
- topology = V_028B6C_OUTPUT_POINT;
- else if (tes->info.tes.primitive_mode == GL_ISOLINES)
- topology = V_028B6C_OUTPUT_LINE;
- else if (tes->info.tes.ccw)
- topology = V_028B6C_OUTPUT_TRIANGLE_CW;
- else
- topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
-
- if (pipeline->device->has_distributed_tess) {
- if (pipeline->device->physical_device->rad_info.family == CHIP_FIJI ||
- pipeline->device->physical_device->rad_info.family >= CHIP_POLARIS10)
- distribution_mode = V_028B6C_DISTRIBUTION_MODE_TRAPEZOIDS;
- else
- distribution_mode = V_028B6C_DISTRIBUTION_MODE_DONUTS;
- } else
- distribution_mode = V_028B6C_DISTRIBUTION_MODE_NO_DIST;
-
- tess->tf_param = S_028B6C_TYPE(type) |
- S_028B6C_PARTITIONING(partitioning) |
- S_028B6C_TOPOLOGY(topology) |
- S_028B6C_DISTRIBUTION_MODE(distribution_mode);
-}
-
-static const struct radv_prim_vertex_count prim_size_table[] = {
- [V_008958_DI_PT_NONE] = {0, 0},
- [V_008958_DI_PT_POINTLIST] = {1, 1},
- [V_008958_DI_PT_LINELIST] = {2, 2},
- [V_008958_DI_PT_LINESTRIP] = {2, 1},
- [V_008958_DI_PT_TRILIST] = {3, 3},
- [V_008958_DI_PT_TRIFAN] = {3, 1},
- [V_008958_DI_PT_TRISTRIP] = {3, 1},
- [V_008958_DI_PT_LINELIST_ADJ] = {4, 4},
- [V_008958_DI_PT_LINESTRIP_ADJ] = {4, 1},
- [V_008958_DI_PT_TRILIST_ADJ] = {6, 6},
- [V_008958_DI_PT_TRISTRIP_ADJ] = {6, 2},
- [V_008958_DI_PT_RECTLIST] = {3, 3},
- [V_008958_DI_PT_LINELOOP] = {2, 1},
- [V_008958_DI_PT_POLYGON] = {3, 1},
- [V_008958_DI_PT_2D_TRI_STRIP] = {0, 0},
-};
-
-static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs)
-{
- unsigned gs_max_vert_out = gs->info.gs.vertices_out;
- unsigned cut_mode;
-
- if (gs_max_vert_out <= 128) {
- cut_mode = V_028A40_GS_CUT_128;
- } else if (gs_max_vert_out <= 256) {
- cut_mode = V_028A40_GS_CUT_256;
- } else if (gs_max_vert_out <= 512) {
- cut_mode = V_028A40_GS_CUT_512;
- } else {
- assert(gs_max_vert_out <= 1024);
- cut_mode = V_028A40_GS_CUT_1024;
- }
-
- return S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
- S_028A40_CUT_MODE(cut_mode)|
- S_028A40_ES_WRITE_OPTIMIZE(1) |
- S_028A40_GS_WRITE_OPTIMIZE(1);
-}
-
-static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline)
-{
- struct radv_shader_variant *vs;
- vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : (radv_pipeline_has_tess(pipeline) ? pipeline->shaders[MESA_SHADER_TESS_EVAL] : pipeline->shaders[MESA_SHADER_VERTEX]);
-
- struct ac_vs_output_info *outinfo = &vs->info.vs.outinfo;
-
- unsigned clip_dist_mask, cull_dist_mask, total_mask;
- clip_dist_mask = outinfo->clip_dist_mask;
- cull_dist_mask = outinfo->cull_dist_mask;
- total_mask = clip_dist_mask | cull_dist_mask;
-
- bool misc_vec_ena = outinfo->writes_pointsize ||
- outinfo->writes_layer ||
- outinfo->writes_viewport_index;
- pipeline->graphics.pa_cl_vs_out_cntl =
- S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
- S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
- S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
- S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
- S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
- S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
- S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
- cull_dist_mask << 8 |
- clip_dist_mask;
-
-}
-static void calculate_ps_inputs(struct radv_pipeline *pipeline)
-{
- struct radv_shader_variant *ps, *vs;
- struct ac_vs_output_info *outinfo;
-
- ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
- vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : (radv_pipeline_has_tess(pipeline) ? pipeline->shaders[MESA_SHADER_TESS_EVAL] : pipeline->shaders[MESA_SHADER_VERTEX]);
-
- outinfo = &vs->info.vs.outinfo;
-
- unsigned ps_offset = 0;
- if (ps->info.fs.has_pcoord) {
- unsigned val;
- val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
- pipeline->graphics.ps_input_cntl[ps_offset] = val;
- ps_offset++;
- }
-
- if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) {
- unsigned vs_offset, flat_shade;
- unsigned val;
- vs_offset = outinfo->prim_id_output;
- flat_shade = true;
- val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
- pipeline->graphics.ps_input_cntl[ps_offset] = val;
- ++ps_offset;
- }
-
- if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) {
- unsigned vs_offset, flat_shade;
- unsigned val;
- vs_offset = outinfo->layer_output;
- flat_shade = true;
- val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
- pipeline->graphics.ps_input_cntl[ps_offset] = val;
- ++ps_offset;
- }
-
- for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
- unsigned vs_offset, flat_shade;
- unsigned val;
-
- if (!(ps->info.fs.input_mask & (1u << i)))
- continue;
-
- if (!(outinfo->export_mask & (1u << i))) {
- pipeline->graphics.ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20);
- ++ps_offset;
- continue;
- }
-
- vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1));
- if (outinfo->prim_id_output != 0xffffffff) {
- if (vs_offset >= outinfo->prim_id_output)
- vs_offset++;
- }
- if (outinfo->layer_output != 0xffffffff) {
- if (vs_offset >= outinfo->layer_output)
- vs_offset++;
- }
- flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
-
- val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
- pipeline->graphics.ps_input_cntl[ps_offset] = val;
- ++ps_offset;
- }
-
- pipeline->graphics.ps_input_cntl_num = ps_offset;
-}
-
VkResult
radv_pipeline_init(struct radv_pipeline *pipeline,
struct radv_device *device,
@@ -1949,8 +1214,8 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
const VkAllocationCallbacks *alloc)
{
struct radv_shader_module fs_m = {0};
- VkResult result;
+ bool dump = getenv("RADV_DUMP_SHADERS");
if (alloc == NULL)
alloc = &device->alloc;
@@ -1968,62 +1233,24 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra);
+ /* */
if (modules[MESA_SHADER_VERTEX]) {
- bool as_es = false;
- bool as_ls = false;
- if (modules[MESA_SHADER_TESS_CTRL])
- as_ls = true;
- else if (modules[MESA_SHADER_GEOMETRY])
- as_es = true;
- union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, as_es, as_ls);
+ union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo);
pipeline->shaders[MESA_SHADER_VERTEX] =
radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_VERTEX],
pStages[MESA_SHADER_VERTEX]->pName,
MESA_SHADER_VERTEX,
pStages[MESA_SHADER_VERTEX]->pSpecializationInfo,
- pipeline->layout, &key);
+ pipeline->layout, &key, dump);
pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_VERTEX);
}
- if (modules[MESA_SHADER_GEOMETRY]) {
- union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, false, false);
-
- pipeline->shaders[MESA_SHADER_GEOMETRY] =
- radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_GEOMETRY],
- pStages[MESA_SHADER_GEOMETRY]->pName,
- MESA_SHADER_GEOMETRY,
- pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo,
- pipeline->layout, &key);
-
- pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_GEOMETRY);
-
- pipeline->graphics.vgt_gs_mode = si_vgt_gs_mode(pipeline->shaders[MESA_SHADER_GEOMETRY]);
- } else
- pipeline->graphics.vgt_gs_mode = 0;
-
- if (modules[MESA_SHADER_TESS_EVAL]) {
- assert(modules[MESA_SHADER_TESS_CTRL]);
-
- radv_tess_pipeline_compile(pipeline,
- cache,
- modules[MESA_SHADER_TESS_CTRL],
- modules[MESA_SHADER_TESS_EVAL],
- pStages[MESA_SHADER_TESS_CTRL]->pName,
- pStages[MESA_SHADER_TESS_EVAL]->pName,
- pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo,
- pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo,
- pipeline->layout,
- pCreateInfo->pTessellationState->patchControlPoints);
- pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_TESS_EVAL) |
- mesa_to_vk_shader_stage(MESA_SHADER_TESS_CTRL);
- }
-
if (!modules[MESA_SHADER_FRAGMENT]) {
nir_builder fs_b;
nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
- fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "noop_fs");
+ fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "noop_fs");
fs_m.nir = fs_b.shader;
modules[MESA_SHADER_FRAGMENT] = &fs_m;
}
@@ -2040,7 +1267,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
stage ? stage->pName : "main",
MESA_SHADER_FRAGMENT,
stage ? stage->pSpecializationInfo : NULL,
- pipeline->layout, &key);
+ pipeline->layout, &key, dump);
pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_FRAGMENT);
}
@@ -2051,95 +1278,12 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
radv_pipeline_init_raster_state(pipeline, pCreateInfo);
radv_pipeline_init_multisample_state(pipeline, pCreateInfo);
pipeline->graphics.prim = si_translate_prim(pCreateInfo->pInputAssemblyState->topology);
- pipeline->graphics.can_use_guardband = radv_prim_can_use_guardband(pCreateInfo->pInputAssemblyState->topology);
-
- if (radv_pipeline_has_gs(pipeline)) {
- pipeline->graphics.gs_out = si_conv_gl_prim_to_gs_out(pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim);
- pipeline->graphics.can_use_guardband = pipeline->graphics.gs_out == V_028A6C_OUTPRIM_TYPE_TRISTRIP;
- } else {
- pipeline->graphics.gs_out = si_conv_prim_to_gs_out(pCreateInfo->pInputAssemblyState->topology);
- }
+ pipeline->graphics.gs_out = si_conv_prim_to_gs_out(pCreateInfo->pInputAssemblyState->topology);
if (extra && extra->use_rectlist) {
pipeline->graphics.prim = V_008958_DI_PT_RECTLIST;
pipeline->graphics.gs_out = V_028A6C_OUTPRIM_TYPE_TRISTRIP;
- pipeline->graphics.can_use_guardband = true;
}
pipeline->graphics.prim_restart_enable = !!pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
- /* prim vertex count will need TESS changes */
- pipeline->graphics.prim_vertex_count = prim_size_table[pipeline->graphics.prim];
-
- /* Ensure that some export memory is always allocated, for two reasons:
- *
- * 1) Correctness: The hardware ignores the EXEC mask if no export
- * memory is allocated, so KILL and alpha test do not work correctly
- * without this.
- * 2) Performance: Every shader needs at least a NULL export, even when
- * it writes no color/depth output. The NULL export instruction
- * stalls without this setting.
- *
- * Don't add this to CB_SHADER_MASK.
- */
- struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
- if (!pipeline->graphics.blend.spi_shader_col_format) {
- if (!ps->info.fs.writes_z &&
- !ps->info.fs.writes_stencil &&
- !ps->info.fs.writes_sample_mask)
- pipeline->graphics.blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
- }
-
- unsigned z_order;
- pipeline->graphics.db_shader_control = 0;
- if (ps->info.fs.early_fragment_test || !ps->info.fs.writes_memory)
- z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
- else
- z_order = V_02880C_LATE_Z;
-
- pipeline->graphics.db_shader_control =
- S_02880C_Z_EXPORT_ENABLE(ps->info.fs.writes_z) |
- S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.fs.writes_stencil) |
- S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) |
- S_02880C_MASK_EXPORT_ENABLE(ps->info.fs.writes_sample_mask) |
- S_02880C_Z_ORDER(z_order) |
- S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) |
- S_02880C_EXEC_ON_HIER_FAIL(ps->info.fs.writes_memory) |
- S_02880C_EXEC_ON_NOOP(ps->info.fs.writes_memory);
-
- pipeline->graphics.shader_z_format =
- ps->info.fs.writes_sample_mask ? V_028710_SPI_SHADER_32_ABGR :
- ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR :
- ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R :
- V_028710_SPI_SHADER_ZERO;
-
- calculate_pa_cl_vs_out_cntl(pipeline);
- calculate_ps_inputs(pipeline);
-
- uint32_t stages = 0;
- if (radv_pipeline_has_tess(pipeline)) {
- stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
- S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);
-
- if (radv_pipeline_has_gs(pipeline))
- stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
- S_028B54_GS_EN(1) |
- S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
- else
- stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
- } else if (radv_pipeline_has_gs(pipeline))
- stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
- S_028B54_GS_EN(1) |
- S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
- pipeline->graphics.vgt_shader_stages_en = stages;
-
- if (radv_pipeline_has_gs(pipeline))
- calculate_gs_ring_sizes(pipeline);
-
- if (radv_pipeline_has_tess(pipeline)) {
- if (pipeline->graphics.prim == V_008958_DI_PT_PATCH) {
- pipeline->graphics.prim_vertex_count.min = pCreateInfo->pTessellationState->patchControlPoints;
- pipeline->graphics.prim_vertex_count.incr = 1;
- }
- calculate_tess_state(pipeline, pCreateInfo);
- }
const VkPipelineVertexInputStateCreateInfo *vi_info =
pCreateInfo->pVertexInputState;
@@ -2175,12 +1319,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
pipeline->binding_stride[desc->binding] = desc->stride;
}
- if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
- radv_dump_pipeline_stats(device, pipeline);
- }
-
- result = radv_pipeline_scratch_init(device, pipeline);
- return result;
+ return VK_SUCCESS;
}
VkResult
@@ -2206,7 +1345,7 @@ radv_graphics_pipeline_create(
result = radv_pipeline_init(pipeline, device, cache,
pCreateInfo, extra, pAllocator);
if (result != VK_SUCCESS) {
- radv_pipeline_destroy(device, pipeline, pAllocator);
+ vk_free2(&device->alloc, pAllocator, pipeline);
return result;
}
@@ -2227,18 +1366,20 @@ VkResult radv_CreateGraphicsPipelines(
unsigned i = 0;
for (; i < count; i++) {
- VkResult r;
- r = radv_graphics_pipeline_create(_device,
- pipelineCache,
- &pCreateInfos[i],
- NULL, pAllocator, &pPipelines[i]);
- if (r != VK_SUCCESS) {
- result = r;
- pPipelines[i] = VK_NULL_HANDLE;
+ result = radv_graphics_pipeline_create(_device,
+ pipelineCache,
+ &pCreateInfos[i],
+ NULL, pAllocator, &pPipelines[i]);
+ if (result != VK_SUCCESS) {
+ for (unsigned j = 0; j < i; j++) {
+ radv_DestroyPipeline(_device, pPipelines[j], pAllocator);
+ }
+
+ return result;
}
}
- return result;
+ return VK_SUCCESS;
}
static VkResult radv_compute_pipeline_create(
@@ -2252,7 +1393,7 @@ static VkResult radv_compute_pipeline_create(
RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
RADV_FROM_HANDLE(radv_shader_module, module, pCreateInfo->stage.module);
struct radv_pipeline *pipeline;
- VkResult result;
+ bool dump = getenv("RADV_DUMP_SHADERS");
pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -2268,20 +1409,9 @@ static VkResult radv_compute_pipeline_create(
pCreateInfo->stage.pName,
MESA_SHADER_COMPUTE,
pCreateInfo->stage.pSpecializationInfo,
- pipeline->layout, NULL);
-
-
- result = radv_pipeline_scratch_init(device, pipeline);
- if (result != VK_SUCCESS) {
- radv_pipeline_destroy(device, pipeline, pAllocator);
- return result;
- }
+ pipeline->layout, NULL, dump);
*pPipeline = radv_pipeline_to_handle(pipeline);
-
- if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
- radv_dump_pipeline_stats(device, pipeline);
- }
return VK_SUCCESS;
}
VkResult radv_CreateComputePipelines(
@@ -2296,15 +1426,17 @@ VkResult radv_CreateComputePipelines(
unsigned i = 0;
for (; i < count; i++) {
- VkResult r;
- r = radv_compute_pipeline_create(_device, pipelineCache,
- &pCreateInfos[i],
- pAllocator, &pPipelines[i]);
- if (r != VK_SUCCESS) {
- result = r;
- pPipelines[i] = VK_NULL_HANDLE;
+ result = radv_compute_pipeline_create(_device, pipelineCache,
+ &pCreateInfos[i],
+ pAllocator, &pPipelines[i]);
+ if (result != VK_SUCCESS) {
+ for (unsigned j = 0; j < i; j++) {
+ radv_DestroyPipeline(_device, pPipelines[j], pAllocator);
+ }
+
+ return result;
}
}
- return result;
+ return VK_SUCCESS;
}
diff --git a/lib/mesa/src/amd/vulkan/radv_pipeline_cache.c b/lib/mesa/src/amd/vulkan/radv_pipeline_cache.c
index 5f6355f0d..b42935554 100644
--- a/lib/mesa/src/amd/vulkan/radv_pipeline_cache.c
+++ b/lib/mesa/src/amd/vulkan/radv_pipeline_cache.c
@@ -57,7 +57,7 @@ radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
/* We don't consider allocation failure fatal, we just start with a 0-sized
* cache. */
if (cache->hash_table == NULL ||
- (device->debug_flags & RADV_DEBUG_NO_CACHE))
+ !env_var_as_boolean("RADV_ENABLE_PIPELINE_CACHE", true))
cache->table_size = 0;
else
memset(cache->hash_table, 0, byte_size);
@@ -88,25 +88,23 @@ radv_hash_shader(unsigned char *hash, struct radv_shader_module *module,
const char *entrypoint,
const VkSpecializationInfo *spec_info,
const struct radv_pipeline_layout *layout,
- const union ac_shader_variant_key *key,
- uint32_t is_geom_copy_shader)
+ const union ac_shader_variant_key *key)
{
- struct mesa_sha1 ctx;
+ struct mesa_sha1 *ctx;
- _mesa_sha1_init(&ctx);
+ ctx = _mesa_sha1_init();
if (key)
- _mesa_sha1_update(&ctx, key, sizeof(*key));
- _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
- _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
+ _mesa_sha1_update(ctx, key, sizeof(*key));
+ _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
+ _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
if (layout)
- _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
+ _mesa_sha1_update(ctx, layout->sha1, sizeof(layout->sha1));
if (spec_info) {
- _mesa_sha1_update(&ctx, spec_info->pMapEntries,
+ _mesa_sha1_update(ctx, spec_info->pMapEntries,
spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
- _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
+ _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
}
- _mesa_sha1_update(&ctx, &is_geom_copy_shader, 4);
- _mesa_sha1_final(&ctx, hash);
+ _mesa_sha1_final(ctx, hash);
}
@@ -152,10 +150,7 @@ radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
struct radv_pipeline_cache *cache,
const unsigned char *sha1)
{
- struct cache_entry *entry = NULL;
-
- if (cache)
- entry = radv_pipeline_cache_search(cache, sha1);
+ struct cache_entry *entry = radv_pipeline_cache_search(cache, sha1);
if (!entry)
return NULL;
@@ -174,7 +169,7 @@ radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
variant->ref_count = 1;
variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256,
- RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
+ RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
void *ptr = device->ws->buffer_map(variant->bo);
memcpy(ptr, entry->code, entry->code_size);
@@ -263,9 +258,6 @@ radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache,
struct radv_shader_variant *variant,
const void *code, unsigned code_size)
{
- if (!cache)
- return variant;
-
pthread_mutex_lock(&cache->mutex);
struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
if (entry) {
@@ -311,13 +303,13 @@ struct cache_header {
uint32_t device_id;
uint8_t uuid[VK_UUID_SIZE];
};
-
void
radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
const void *data, size_t size)
{
struct radv_device *device = cache->device;
struct cache_header header;
+ uint8_t uuid[VK_UUID_SIZE];
if (size < sizeof(header))
return;
@@ -328,9 +320,10 @@ radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
return;
if (header.vendor_id != 0x1002)
return;
- if (header.device_id != device->physical_device->rad_info.pci_id)
+ if (header.device_id != device->instance->physicalDevice.rad_info.pci_id)
return;
- if (memcmp(header.uuid, device->physical_device->uuid, VK_UUID_SIZE) != 0)
+ radv_device_get_cache_uuid(uuid);
+ if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
return;
char *end = (void *) data + size;
@@ -428,8 +421,8 @@ VkResult radv_GetPipelineCacheData(
header->header_size = sizeof(*header);
header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
header->vendor_id = 0x1002;
- header->device_id = device->physical_device->rad_info.pci_id;
- memcpy(header->uuid, device->physical_device->uuid, VK_UUID_SIZE);
+ header->device_id = device->instance->physicalDevice.rad_info.pci_id;
+ radv_device_get_cache_uuid(header->uuid);
p += header->header_size;
struct cache_entry *entry;
diff --git a/lib/mesa/src/amd/vulkan/radv_private.h b/lib/mesa/src/amd/vulkan/radv_private.h
index 08f53a169..cfdda3654 100644
--- a/lib/mesa/src/amd/vulkan/radv_private.h
+++ b/lib/mesa/src/amd/vulkan/radv_private.h
@@ -53,7 +53,6 @@
#include "radv_radeon_winsys.h"
#include "ac_binary.h"
#include "ac_nir_to_llvm.h"
-#include "radv_debug.h"
#include "radv_descriptor_set.h"
#include <llvm-c/TargetMachine.h>
@@ -79,29 +78,14 @@ typedef uint32_t xcb_window_t;
#define MAX_VIEWPORTS 16
#define MAX_SCISSORS 16
#define MAX_PUSH_CONSTANTS_SIZE 128
-#define MAX_PUSH_DESCRIPTORS 32
#define MAX_DYNAMIC_BUFFERS 16
-#define MAX_SAMPLES_LOG2 4
+#define MAX_IMAGES 8
+#define MAX_SAMPLES_LOG2 4 /* SKL supports 16 samples */
#define NUM_META_FS_KEYS 11
-#define RADV_MAX_DRM_DEVICES 8
#define NUM_DEPTH_CLEAR_PIPELINES 3
-enum radv_mem_heap {
- RADV_MEM_HEAP_VRAM,
- RADV_MEM_HEAP_VRAM_CPU_ACCESS,
- RADV_MEM_HEAP_GTT,
- RADV_MEM_HEAP_COUNT
-};
-
-enum radv_mem_type {
- RADV_MEM_TYPE_VRAM,
- RADV_MEM_TYPE_GTT_WRITE_COMBINE,
- RADV_MEM_TYPE_VRAM_CPU_ACCESS,
- RADV_MEM_TYPE_GTT_CACHED,
- RADV_MEM_TYPE_COUNT
-};
-
+#define radv_noreturn __attribute__((__noreturn__))
#define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
static inline uint32_t
@@ -189,12 +173,20 @@ radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
__dword &= ~(1 << (b)))
#define typed_memcpy(dest, src, count) ({ \
- STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \
+ static_assert(sizeof(*src) == sizeof(*dest), ""); \
memcpy((dest), (src), (count) * sizeof(*(src))); \
})
#define zero(x) (memset(&(x), 0, sizeof(x)))
+/* Define no kernel as 1, since that's an illegal offset for a kernel */
+#define NO_KERNEL 1
+
+struct radv_common {
+ VkStructureType sType;
+ const void* pNext;
+};
+
/* Whenever we generate an error, pass it through this function. Useful for
* debugging, where we can break on it. Only call at error site, not when
* propagating errors. Might be useful to plug in a stack trace here.
@@ -219,13 +211,7 @@ void radv_loge_v(const char *format, va_list va);
* Print a FINISHME message, including its source location.
*/
#define radv_finishme(format, ...) \
- do { \
- static bool reported = false; \
- if (!reported) { \
- __radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
- reported = true; \
- } \
- } while (0)
+ __radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__);
/* A non-fatal assert. Useful for debugging. */
#ifdef DEBUG
@@ -237,6 +223,9 @@ void radv_loge_v(const char *format, va_list va);
#define radv_assert(x)
#endif
+void radv_abortf(const char *format, ...) radv_noreturn radv_printflike(1, 2);
+void radv_abortfv(const char *format, va_list va) radv_noreturn;
+
#define stub_return(v) \
do { \
radv_finishme("stub %s", __func__); \
@@ -249,12 +238,10 @@ void radv_loge_v(const char *format, va_list va);
return; \
} while (0)
+void *radv_resolve_entrypoint(uint32_t index);
void *radv_lookup_entrypoint(const char *name);
-struct radv_extensions {
- VkExtensionProperties *ext_array;
- uint32_t num_ext;
-};
+extern struct radv_dispatch_table dtable;
struct radv_physical_device {
VK_LOADER_DATA _loader_data;
@@ -263,13 +250,15 @@ struct radv_physical_device {
struct radeon_winsys *ws;
struct radeon_info rad_info;
+ uint32_t chipset_id;
char path[20];
const char * name;
- uint8_t uuid[VK_UUID_SIZE];
+ uint64_t aperture_size;
+ int cmd_parser_version;
+ uint32_t pci_vendor_id;
+ uint32_t pci_device_id;
- int local_fd;
struct wsi_device wsi_device;
- struct radv_extensions extensions;
};
struct radv_instance {
@@ -279,9 +268,7 @@ struct radv_instance {
uint32_t apiVersion;
int physicalDeviceCount;
- struct radv_physical_device physicalDevices[RADV_MAX_DRM_DEVICES];
-
- uint64_t debug_flags;
+ struct radv_physical_device physicalDevice;
};
VkResult radv_init_wsi(struct radv_physical_device *physical_device);
@@ -337,9 +324,11 @@ struct radv_meta_state {
VkRenderPass render_pass[NUM_META_FS_KEYS];
struct radv_pipeline *color_pipelines[NUM_META_FS_KEYS];
- VkRenderPass depthstencil_rp;
+ VkRenderPass depth_only_rp[NUM_DEPTH_CLEAR_PIPELINES];
struct radv_pipeline *depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+ VkRenderPass stencil_only_rp[NUM_DEPTH_CLEAR_PIPELINES];
struct radv_pipeline *stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+ VkRenderPass depthstencil_rp[NUM_DEPTH_CLEAR_PIPELINES];
struct radv_pipeline *depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
} clear[1 + MAX_SAMPLES_LOG2];
@@ -393,16 +382,6 @@ struct radv_meta_state {
VkDescriptorSetLayout img_ds_layout;
VkPipeline pipeline;
} btoi;
- struct {
- VkPipelineLayout img_p_layout;
- VkDescriptorSetLayout img_ds_layout;
- VkPipeline pipeline;
- } itoi;
- struct {
- VkPipelineLayout img_p_layout;
- VkDescriptorSetLayout img_ds_layout;
- VkPipeline pipeline;
- } cleari;
struct {
VkPipeline pipeline;
@@ -438,47 +417,14 @@ struct radv_meta_state {
VkPipeline fill_pipeline;
VkPipeline copy_pipeline;
} buffer;
-
- struct {
- VkDescriptorSetLayout ds_layout;
- VkPipelineLayout p_layout;
- VkPipeline occlusion_query_pipeline;
- VkPipeline pipeline_statistics_query_pipeline;
- } query;
};
-/* queue types */
-#define RADV_QUEUE_GENERAL 0
-#define RADV_QUEUE_COMPUTE 1
-#define RADV_QUEUE_TRANSFER 2
-
-#define RADV_MAX_QUEUE_FAMILIES 3
-
-enum ring_type radv_queue_family_to_ring(int f);
-
struct radv_queue {
VK_LOADER_DATA _loader_data;
+
struct radv_device * device;
- struct radeon_winsys_ctx *hw_ctx;
- int queue_family_index;
- int queue_idx;
-
- uint32_t scratch_size;
- uint32_t compute_scratch_size;
- uint32_t esgs_ring_size;
- uint32_t gsvs_ring_size;
- bool has_tess_rings;
- bool has_sample_positions;
-
- struct radeon_winsys_bo *scratch_bo;
- struct radeon_winsys_bo *descriptor_bo;
- struct radeon_winsys_bo *compute_scratch_bo;
- struct radeon_winsys_bo *esgs_ring_bo;
- struct radeon_winsys_bo *gsvs_ring_bo;
- struct radeon_winsys_bo *tess_factor_ring_bo;
- struct radeon_winsys_bo *tess_offchip_ring_bo;
- struct radeon_winsys_cs *initial_preamble_cs;
- struct radeon_winsys_cs *continue_preamble_cs;
+
+ struct radv_state_pool * pool;
};
struct radv_device {
@@ -488,22 +434,14 @@ struct radv_device {
struct radv_instance * instance;
struct radeon_winsys *ws;
+ struct radeon_winsys_ctx *hw_ctx;
struct radv_meta_state meta_state;
+ struct radv_queue queue;
+ struct radeon_winsys_cs *empty_cs;
- struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
- int queue_count[RADV_MAX_QUEUE_FAMILIES];
- struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
- struct radeon_winsys_cs *flush_cs[RADV_MAX_QUEUE_FAMILIES];
- struct radeon_winsys_cs *flush_shader_cs[RADV_MAX_QUEUE_FAMILIES];
- uint64_t debug_flags;
-
- bool llvm_supports_spill;
- bool has_distributed_tess;
- uint32_t tess_offchip_block_dw_size;
- uint32_t scratch_waves;
-
- uint32_t gs_table_depth;
+ bool allow_fast_clears;
+ bool allow_dcc;
/* MSAA sample locations.
* The first index is the sample index.
@@ -513,25 +451,12 @@ struct radv_device {
float sample_locations_4x[4][2];
float sample_locations_8x[8][2];
float sample_locations_16x[16][2];
-
- /* CIK and later */
- uint32_t gfx_init_size_dw;
- struct radeon_winsys_bo *gfx_init;
-
- struct radeon_winsys_bo *trace_bo;
- uint32_t *trace_id_ptr;
-
- struct radv_physical_device *physical_device;
-
- /* Backup in-memory cache to be used if the app doesn't provide one */
- struct radv_pipeline_cache * mem_cache;
};
+void radv_device_get_cache_uuid(void *uuid);
+
struct radv_device_memory {
struct radeon_winsys_bo *bo;
- /* for dedicated allocations */
- struct radv_image *image;
- struct radv_buffer *buffer;
uint32_t type_index;
VkDeviceSize map_size;
void * map;
@@ -545,62 +470,35 @@ struct radv_descriptor_range {
struct radv_descriptor_set {
const struct radv_descriptor_set_layout *layout;
+ struct list_head descriptor_pool;
uint32_t size;
+ struct radv_buffer_view *buffer_views;
struct radeon_winsys_bo *bo;
uint64_t va;
uint32_t *mapped_ptr;
struct radv_descriptor_range *dynamic_descriptors;
-
- struct list_head vram_list;
-
struct radeon_winsys_bo *descriptors[0];
};
-struct radv_push_descriptor_set
-{
- struct radv_descriptor_set set;
- uint32_t capacity;
+struct radv_descriptor_pool_free_node {
+ int next;
+ uint32_t offset;
+ uint32_t size;
};
struct radv_descriptor_pool {
+ struct list_head descriptor_sets;
+
struct radeon_winsys_bo *bo;
uint8_t *mapped_ptr;
uint64_t current_offset;
uint64_t size;
- struct list_head vram_list;
-};
-
-struct radv_descriptor_update_template_entry {
- VkDescriptorType descriptor_type;
-
- /* The number of descriptors to update */
- uint32_t descriptor_count;
-
- /* Into mapped_ptr or dynamic_descriptors, in units of the respective array */
- uint32_t dst_offset;
-
- /* In dwords. Not valid/used for dynamic descriptors */
- uint32_t dst_stride;
-
- uint32_t buffer_offset;
- uint32_t buffer_count;
-
- /* Only valid for combined image samplers and samplers */
- uint16_t has_sampler;
-
- /* In bytes */
- size_t src_offset;
- size_t src_stride;
-
- /* For push descriptors */
- const uint32_t *immutable_samplers;
-};
-
-struct radv_descriptor_update_template {
- uint32_t entry_count;
- struct radv_descriptor_update_template_entry entry[0];
+ int free_list;
+ int full_list;
+ uint32_t max_sets;
+ struct radv_descriptor_pool_free_node free_nodes[];
};
struct radv_buffer {
@@ -608,7 +506,6 @@ struct radv_buffer {
VkDeviceSize size;
VkBufferUsageFlags usage;
- VkBufferCreateFlags flags;
/* Set when bound */
struct radeon_winsys_bo * bo;
@@ -641,18 +538,16 @@ enum radv_cmd_flush_bits {
RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2,
/* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3,
- /* Same as above, but only writes back and doesn't invalidate */
- RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4,
/* Framebuffer caches */
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
- RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
- RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 4,
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 5,
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 6,
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 7,
/* Engine synchronization. */
- RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
- RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
- RADV_CMD_FLAG_VGT_FLUSH = 1 << 12,
+ RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 8,
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 9,
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 10,
+ RADV_CMD_FLAG_VGT_FLUSH = 1 << 11,
RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
@@ -725,9 +620,8 @@ struct radv_attachment_state {
struct radv_cmd_state {
uint32_t vb_dirty;
- radv_cmd_dirty_mask_t dirty;
bool vertex_descriptors_dirty;
- bool push_descriptors_dirty;
+ radv_cmd_dirty_mask_t dirty;
struct radv_pipeline * pipeline;
struct radv_pipeline * emitted_pipeline;
@@ -744,21 +638,14 @@ struct radv_cmd_state {
struct radv_buffer * index_buffer;
uint32_t index_type;
uint32_t index_offset;
- int32_t last_primitive_reset_en;
uint32_t last_primitive_reset_index;
enum radv_cmd_flush_bits flush_bits;
unsigned active_occlusion_queries;
float offset_scale;
- uint32_t descriptors_dirty;
- uint32_t trace_id;
- uint32_t last_ia_multi_vgt_param;
};
-
struct radv_cmd_pool {
VkAllocationCallbacks alloc;
struct list_head cmd_buffers;
- struct list_head free_cmd_buffers;
- uint32_t queue_family_index;
};
struct radv_cmd_buffer_upload {
@@ -781,53 +668,25 @@ struct radv_cmd_buffer {
VkCommandBufferLevel level;
struct radeon_winsys_cs *cs;
struct radv_cmd_state state;
- uint32_t queue_family_index;
uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
- uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
+ uint32_t dynamic_buffers[16 * MAX_DYNAMIC_BUFFERS];
VkShaderStageFlags push_constant_stages;
- struct radv_push_descriptor_set push_descriptors;
- struct radv_descriptor_set meta_push_descriptors;
struct radv_cmd_buffer_upload upload;
bool record_fail;
-
- uint32_t scratch_size_needed;
- uint32_t compute_scratch_size_needed;
- uint32_t esgs_ring_size_needed;
- uint32_t gsvs_ring_size_needed;
- bool tess_rings_needed;
- bool sample_positions_needed;
-
- int ring_offsets_idx; /* just used for verification */
};
struct radv_image;
-bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
-
-void si_init_compute(struct radv_cmd_buffer *cmd_buffer);
-void si_init_config(struct radv_cmd_buffer *cmd_buffer);
-
-void cik_create_gfx_config(struct radv_device *device);
-
+void si_init_config(struct radv_physical_device *physical_device,
+ struct radv_cmd_buffer *cmd_buffer);
void si_write_viewport(struct radeon_winsys_cs *cs, int first_vp,
int count, const VkViewport *viewports);
void si_write_scissors(struct radeon_winsys_cs *cs, int first,
- int count, const VkRect2D *scissors,
- const VkViewport *viewports, bool can_use_guardband);
-uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
- bool instanced_draw, bool indirect_draw,
- uint32_t draw_vertex_count);
-void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
- enum chip_class chip_class,
- bool is_mec,
- enum radv_cmd_flush_bits flush_bits);
-void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
- enum chip_class chip_class,
- bool is_mec,
- enum radv_cmd_flush_bits flush_bits);
+ int count, const VkRect2D *scissors);
+uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer);
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
uint64_t src_va, uint64_t dest_va,
@@ -870,10 +729,7 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
struct radeon_winsys_bo *bo,
uint64_t offset, uint64_t size, uint32_t value);
-void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
-bool radv_get_memory_fd(struct radv_device *device,
- struct radv_device_memory *memory,
- int *pFD);
+
/*
* Takes x,y,z as exact numbers of invocations, instead of blocks.
*
@@ -907,8 +763,7 @@ radv_hash_shader(unsigned char *hash, struct radv_shader_module *module,
const char *entrypoint,
const VkSpecializationInfo *spec_info,
const struct radv_pipeline_layout *layout,
- const union ac_shader_variant_key *key,
- uint32_t is_geom_copy_shader);
+ const union ac_shader_variant_key *key);
static inline gl_shader_stage
vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
@@ -939,7 +794,6 @@ struct radv_shader_variant {
struct ac_shader_variant_info info;
unsigned rsrc1;
unsigned rsrc2;
- uint32_t code_size;
};
struct radv_depth_stencil_state {
@@ -964,6 +818,7 @@ unsigned radv_format_meta_fs_key(VkFormat format);
struct radv_raster_state {
uint32_t pa_cl_clip_cntl;
+ uint32_t pa_cl_vs_out_cntl;
uint32_t spi_interp_control;
uint32_t pa_su_point_size;
uint32_t pa_su_point_minmax;
@@ -982,23 +837,6 @@ struct radv_multisample_state {
unsigned num_samples;
};
-struct radv_prim_vertex_count {
- uint8_t min;
- uint8_t incr;
-};
-
-struct radv_tessellation_state {
- uint32_t ls_hs_config;
- uint32_t tcs_in_layout;
- uint32_t tcs_out_layout;
- uint32_t tcs_out_offsets;
- uint32_t offchip_layout;
- unsigned num_patches;
- unsigned lds_size;
- unsigned num_tcs_input_cp;
- uint32_t tf_param;
-};
-
struct radv_pipeline {
struct radv_device * device;
uint32_t dynamic_state_mask;
@@ -1009,7 +847,6 @@ struct radv_pipeline {
bool needs_data_cache;
struct radv_shader_variant * shaders[MESA_SHADER_STAGES];
- struct radv_shader_variant *gs_copy_shader;
VkShaderStageFlags active_stages;
uint32_t va_rsrc_word3[MAX_VERTEX_ATTRIBS];
@@ -1025,38 +862,13 @@ struct radv_pipeline {
struct radv_depth_stencil_state ds;
struct radv_raster_state raster;
struct radv_multisample_state ms;
- struct radv_tessellation_state tess;
- uint32_t db_shader_control;
- uint32_t shader_z_format;
unsigned prim;
unsigned gs_out;
- uint32_t vgt_gs_mode;
bool prim_restart_enable;
- unsigned esgs_ring_size;
- unsigned gsvs_ring_size;
- uint32_t ps_input_cntl[32];
- uint32_t ps_input_cntl_num;
- uint32_t pa_cl_vs_out_cntl;
- uint32_t vgt_shader_stages_en;
- struct radv_prim_vertex_count prim_vertex_count;
- bool can_use_guardband;
} graphics;
};
-
- unsigned max_waves;
- unsigned scratch_bytes_per_wave;
};
-static inline bool radv_pipeline_has_gs(struct radv_pipeline *pipeline)
-{
- return pipeline->shaders[MESA_SHADER_GEOMETRY] ? true : false;
-}
-
-static inline bool radv_pipeline_has_tess(struct radv_pipeline *pipeline)
-{
- return pipeline->shaders[MESA_SHADER_TESS_EVAL] ? true : false;
-}
-
struct radv_graphics_pipeline_create_info {
bool use_rectlist;
bool db_depth_clear;
@@ -1121,6 +933,10 @@ struct radv_cmask_info {
uint64_t offset;
uint64_t size;
unsigned alignment;
+ unsigned pitch;
+ unsigned height;
+ unsigned xalign;
+ unsigned yalign;
unsigned slice_tile_max;
unsigned base_address_reg;
};
@@ -1147,24 +963,22 @@ struct radv_image {
uint32_t samples; /**< VkImageCreateInfo::samples */
VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
VkImageTiling tiling; /** VkImageCreateInfo::tiling */
- VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
VkDeviceSize size;
uint32_t alignment;
- bool exclusive;
- unsigned queue_family_mask;
-
/* Set when bound */
struct radeon_winsys_bo *bo;
VkDeviceSize offset;
uint32_t dcc_offset;
- uint32_t htile_offset;
struct radeon_surf surface;
struct radv_fmask_info fmask;
struct radv_cmask_info cmask;
uint32_t clear_value_offset;
+
+ /* Depth buffer compression and fast clear. */
+ struct r600_htile_info htile;
};
bool radv_layout_has_htile(const struct radv_image *image,
@@ -1173,13 +987,8 @@ bool radv_layout_is_htile_compressed(const struct radv_image *image,
VkImageLayout layout);
bool radv_layout_can_expclear(const struct radv_image *image,
VkImageLayout layout);
-bool radv_layout_can_fast_clear(const struct radv_image *image,
- VkImageLayout layout,
- unsigned queue_mask);
-
-
-unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family);
-
+bool radv_layout_has_cmask(const struct radv_image *image,
+ VkImageLayout layout);
static inline uint32_t
radv_get_layerCount(const struct radv_image *image,
const VkImageSubresourceRange *range)
@@ -1382,32 +1191,17 @@ struct radv_query_pool {
uint32_t availability_offset;
char *ptr;
VkQueryType type;
- uint32_t pipeline_stats_mask;
};
-void
-radv_update_descriptor_sets(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
- VkDescriptorSet overrideSet,
- uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet *pDescriptorWrites,
- uint32_t descriptorCopyCount,
- const VkCopyDescriptorSet *pDescriptorCopies);
+VkResult
+radv_temp_descriptor_set_create(struct radv_device *device,
+ struct radv_cmd_buffer *cmd_buffer,
+ VkDescriptorSetLayout _layout,
+ VkDescriptorSet *_set);
void
-radv_update_descriptor_set_with_template(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
- struct radv_descriptor_set *set,
- VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate,
- const void *pData);
-
-void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint pipelineBindPoint,
- VkPipelineLayout _layout,
- uint32_t set,
- uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet *pDescriptorWrites);
-
+radv_temp_descriptor_set_destroy(struct radv_device *device,
+ VkDescriptorSet _set);
void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image, uint32_t value);
void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
@@ -1419,8 +1213,6 @@ struct radv_fence {
bool signalled;
};
-struct radeon_winsys_sem;
-
#define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType) \
\
static inline struct __radv_type * \
@@ -1464,7 +1256,6 @@ RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, VkBufferView)
RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, VkDescriptorPool)
RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, VkDescriptorSet)
RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, VkDescriptorSetLayout)
-RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, VkDescriptorUpdateTemplateKHR)
RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, VkDeviceMemory)
RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_fence, VkFence)
RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_event, VkEvent)
@@ -1478,6 +1269,21 @@ RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, VkQueryPool)
RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, VkRenderPass)
RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, VkSampler)
RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_module, VkShaderModule)
-RADV_DEFINE_NONDISP_HANDLE_CASTS(radeon_winsys_sem, VkSemaphore)
+
+#define RADV_DEFINE_STRUCT_CASTS(__radv_type, __VkType) \
+ \
+ static inline const __VkType * \
+ __radv_type ## _to_ ## __VkType(const struct __radv_type *__radv_obj) \
+ { \
+ return (const __VkType *) __radv_obj; \
+ }
+
+#define RADV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name) \
+ const __VkType *__vk_name = radv_common_to_ ## __VkType(__common_name)
+
+RADV_DEFINE_STRUCT_CASTS(radv_common, VkMemoryBarrier)
+RADV_DEFINE_STRUCT_CASTS(radv_common, VkBufferMemoryBarrier)
+RADV_DEFINE_STRUCT_CASTS(radv_common, VkImageMemoryBarrier)
+
#endif /* RADV_PRIVATE_H */
diff --git a/lib/mesa/src/amd/vulkan/radv_query.c b/lib/mesa/src/amd/vulkan/radv_query.c
index d581ea534..cce38e853 100644
--- a/lib/mesa/src/amd/vulkan/radv_query.c
+++ b/lib/mesa/src/amd/vulkan/radv_query.c
@@ -29,20 +29,19 @@
#include <unistd.h>
#include <fcntl.h>
-#include "nir/nir_builder.h"
-#include "radv_meta.h"
#include "radv_private.h"
#include "radv_cs.h"
#include "sid.h"
-
-static const int pipelinestat_block_size = 11 * 8;
-static const unsigned pipeline_statistics_indices[] = {7, 6, 3, 4, 5, 2, 1, 0, 8, 9, 10};
-
static unsigned get_max_db(struct radv_device *device)
{
- unsigned num_db = device->physical_device->rad_info.num_render_backends;
- MAYBE_UNUSED unsigned rb_mask = device->physical_device->rad_info.enabled_rb_mask;
+ unsigned num_db = device->instance->physicalDevice.rad_info.num_render_backends;
+ unsigned rb_mask = device->instance->physicalDevice.rad_info.enabled_rb_mask;
+
+ if (device->instance->physicalDevice.rad_info.chip_class == SI)
+ num_db = 8;
+ else
+ num_db = MAX2(8, num_db);
/* Otherwise we need to change the query reset procedure */
assert(rb_mask == ((1ull << num_db) - 1));
@@ -50,696 +49,6 @@ static unsigned get_max_db(struct radv_device *device)
return num_db;
}
-static void radv_break_on_count(nir_builder *b, nir_variable *var, nir_ssa_def *count)
-{
- nir_ssa_def *counter = nir_load_var(b, var);
-
- nir_if *if_stmt = nir_if_create(b->shader);
- if_stmt->condition = nir_src_for_ssa(nir_uge(b, counter, count));
- nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
-
- b->cursor = nir_after_cf_list(&if_stmt->then_list);
-
- nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break);
- nir_builder_instr_insert(b, &instr->instr);
-
- b->cursor = nir_after_cf_node(&if_stmt->cf_node);
- counter = nir_iadd(b, counter, nir_imm_int(b, 1));
- nir_store_var(b, var, counter, 0x1);
-}
-
-static struct nir_ssa_def *
-radv_load_push_int(nir_builder *b, unsigned offset, const char *name)
-{
- nir_intrinsic_instr *flags = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
- flags->src[0] = nir_src_for_ssa(nir_imm_int(b, offset));
- flags->num_components = 1;
- nir_ssa_dest_init(&flags->instr, &flags->dest, 1, 32, name);
- nir_builder_instr_insert(b, &flags->instr);
- return &flags->dest.ssa;
-}
-
-static nir_shader *
-build_occlusion_query_shader(struct radv_device *device) {
- /* the shader this builds is roughly
- *
- * push constants {
- * uint32_t flags;
- * uint32_t dst_stride;
- * };
- *
- * uint32_t src_stride = 16 * db_count;
- *
- * location(binding = 0) buffer dst_buf;
- * location(binding = 1) buffer src_buf;
- *
- * void main() {
- * uint64_t result = 0;
- * uint64_t src_offset = src_stride * global_id.x;
- * uint64_t dst_offset = dst_stride * global_id.x;
- * bool available = true;
- * for (int i = 0; i < db_count; ++i) {
- * uint64_t start = src_buf[src_offset + 16 * i];
- * uint64_t end = src_buf[src_offset + 16 * i + 8];
- * if ((start & (1ull << 63)) && (end & (1ull << 63)))
- * result += end - start;
- * else
- * available = false;
- * }
- * uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
- * if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
- * if (flags & VK_QUERY_RESULT_64_BIT)
- * dst_buf[dst_offset] = result;
- * else
- * dst_buf[dst_offset] = (uint32_t)result.
- * }
- * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
- * dst_buf[dst_offset + elem_size] = available;
- * }
- * }
- */
- nir_builder b;
- nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "occlusion_query");
- b.shader->info->cs.local_size[0] = 64;
- b.shader->info->cs.local_size[1] = 1;
- b.shader->info->cs.local_size[2] = 1;
-
- nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
- nir_variable *outer_counter = nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter");
- nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start");
- nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end");
- nir_variable *available = nir_local_variable_create(b.impl, glsl_int_type(), "available");
- unsigned db_count = get_max_db(device);
-
- nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags");
-
- nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
- nir_intrinsic_vulkan_resource_index);
- dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
- nir_intrinsic_set_desc_set(dst_buf, 0);
- nir_intrinsic_set_binding(dst_buf, 0);
- nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
- nir_builder_instr_insert(&b, &dst_buf->instr);
-
- nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader,
- nir_intrinsic_vulkan_resource_index);
- src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
- nir_intrinsic_set_desc_set(src_buf, 0);
- nir_intrinsic_set_binding(src_buf, 1);
- nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
- nir_builder_instr_insert(&b, &src_buf->instr);
-
- nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
- nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info->cs.local_size[0],
- b.shader->info->cs.local_size[1],
- b.shader->info->cs.local_size[2], 0);
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
- global_id = nir_channel(&b, global_id, 0); // We only care about x here.
-
- nir_ssa_def *input_stride = nir_imm_int(&b, db_count * 16);
- nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
- nir_ssa_def *output_stride = radv_load_push_int(&b, 4, "output_stride");
- nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
-
-
- nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
- nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1);
- nir_store_var(&b, available, nir_imm_int(&b, 1), 0x1);
-
- nir_loop *outer_loop = nir_loop_create(b.shader);
- nir_builder_cf_insert(&b, &outer_loop->cf_node);
- b.cursor = nir_after_cf_list(&outer_loop->body);
-
- nir_ssa_def *current_outer_count = nir_load_var(&b, outer_counter);
- radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count));
-
- nir_ssa_def *load_offset = nir_imul(&b, current_outer_count, nir_imm_int(&b, 16));
- load_offset = nir_iadd(&b, input_base, load_offset);
-
- nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
- load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
- load->src[1] = nir_src_for_ssa(load_offset);
- nir_ssa_dest_init(&load->instr, &load->dest, 2, 64, NULL);
- load->num_components = 2;
- nir_builder_instr_insert(&b, &load->instr);
-
- const unsigned swizzle0[] = {0,0,0,0};
- const unsigned swizzle1[] = {1,1,1,1};
- nir_store_var(&b, start, nir_swizzle(&b, &load->dest.ssa, swizzle0, 1, false), 0x1);
- nir_store_var(&b, end, nir_swizzle(&b, &load->dest.ssa, swizzle1, 1, false), 0x1);
-
- nir_ssa_def *start_done = nir_ilt(&b, nir_load_var(&b, start), nir_imm_int64(&b, 0));
- nir_ssa_def *end_done = nir_ilt(&b, nir_load_var(&b, end), nir_imm_int64(&b, 0));
-
- nir_if *update_if = nir_if_create(b.shader);
- update_if->condition = nir_src_for_ssa(nir_iand(&b, start_done, end_done));
- nir_cf_node_insert(b.cursor, &update_if->cf_node);
-
- b.cursor = nir_after_cf_list(&update_if->then_list);
-
- nir_store_var(&b, result,
- nir_iadd(&b, nir_load_var(&b, result),
- nir_isub(&b, nir_load_var(&b, end),
- nir_load_var(&b, start))), 0x1);
-
- b.cursor = nir_after_cf_list(&update_if->else_list);
-
- nir_store_var(&b, available, nir_imm_int(&b, 0), 0x1);
-
- b.cursor = nir_after_cf_node(&outer_loop->cf_node);
-
- /* Store the result if complete or if partial results have been requested. */
-
- nir_ssa_def *result_is_64bit = nir_iand(&b, flags,
- nir_imm_int(&b, VK_QUERY_RESULT_64_BIT));
- nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
-
- nir_if *store_if = nir_if_create(b.shader);
- store_if->condition = nir_src_for_ssa(nir_ior(&b, nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT)), nir_load_var(&b, available)));
- nir_cf_node_insert(b.cursor, &store_if->cf_node);
-
- b.cursor = nir_after_cf_list(&store_if->then_list);
-
- nir_if *store_64bit_if = nir_if_create(b.shader);
- store_64bit_if->condition = nir_src_for_ssa(result_is_64bit);
- nir_cf_node_insert(b.cursor, &store_64bit_if->cf_node);
-
- b.cursor = nir_after_cf_list(&store_64bit_if->then_list);
-
- nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
- store->src[0] = nir_src_for_ssa(nir_load_var(&b, result));
- store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
- store->src[2] = nir_src_for_ssa(output_base);
- nir_intrinsic_set_write_mask(store, 0x1);
- store->num_components = 1;
- nir_builder_instr_insert(&b, &store->instr);
-
- b.cursor = nir_after_cf_list(&store_64bit_if->else_list);
-
- store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
- store->src[0] = nir_src_for_ssa(nir_u2u32(&b, nir_load_var(&b, result)));
- store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
- store->src[2] = nir_src_for_ssa(output_base);
- nir_intrinsic_set_write_mask(store, 0x1);
- store->num_components = 1;
- nir_builder_instr_insert(&b, &store->instr);
-
- b.cursor = nir_after_cf_node(&store_if->cf_node);
-
- /* Store the availability bit if requested. */
-
- nir_if *availability_if = nir_if_create(b.shader);
- availability_if->condition = nir_src_for_ssa(nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)));
- nir_cf_node_insert(b.cursor, &availability_if->cf_node);
-
- b.cursor = nir_after_cf_list(&availability_if->then_list);
-
- store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
- store->src[0] = nir_src_for_ssa(nir_load_var(&b, available));
- store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
- store->src[2] = nir_src_for_ssa(nir_iadd(&b, result_size, output_base));
- nir_intrinsic_set_write_mask(store, 0x1);
- store->num_components = 1;
- nir_builder_instr_insert(&b, &store->instr);
-
- return b.shader;
-}
-
-static nir_shader *
-build_pipeline_statistics_query_shader(struct radv_device *device) {
- /* the shader this builds is roughly
- *
- * push constants {
- * uint32_t flags;
- * uint32_t dst_stride;
- * uint32_t stats_mask;
- * uint32_t avail_offset;
- * };
- *
- * uint32_t src_stride = pipelinestat_block_size * 2;
- *
- * location(binding = 0) buffer dst_buf;
- * location(binding = 1) buffer src_buf;
- *
- * void main() {
- * uint64_t src_offset = src_stride * global_id.x;
- * uint64_t dst_base = dst_stride * global_id.x;
- * uint64_t dst_offset = dst_base;
- * uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
- * uint32_t elem_count = stats_mask >> 16;
- * uint32_t available = src_buf[avail_offset + 4 * global_id.x];
- * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
- * dst_buf[dst_offset + elem_count * elem_size] = available;
- * }
- * if (available) {
- * // repeat 11 times:
- * if (stats_mask & (1 << 0)) {
- * uint64_t start = src_buf[src_offset + 8 * indices[0]];
- * uint64_t end = src_buf[src_offset + 8 * indices[0] + pipelinestat_block_size];
- * uint64_t result = end - start;
- * if (flags & VK_QUERY_RESULT_64_BIT)
- * dst_buf[dst_offset] = result;
- * else
- * dst_buf[dst_offset] = (uint32_t)result.
- * dst_offset += elem_size;
- * }
- * } else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
- * // Set everything to 0 as we don't know what is valid.
- * for (int i = 0; i < elem_count; ++i)
- * dst_buf[dst_base + elem_size * i] = 0;
- * }
- * }
- */
- nir_builder b;
- nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "pipeline_statistics_query");
- b.shader->info->cs.local_size[0] = 64;
- b.shader->info->cs.local_size[1] = 1;
- b.shader->info->cs.local_size[2] = 1;
-
- nir_variable *output_offset = nir_local_variable_create(b.impl, glsl_int_type(), "output_offset");
-
- nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags");
- nir_ssa_def *stats_mask = radv_load_push_int(&b, 8, "stats_mask");
- nir_ssa_def *avail_offset = radv_load_push_int(&b, 12, "avail_offset");
-
- nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
- nir_intrinsic_vulkan_resource_index);
- dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
- nir_intrinsic_set_desc_set(dst_buf, 0);
- nir_intrinsic_set_binding(dst_buf, 0);
- nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
- nir_builder_instr_insert(&b, &dst_buf->instr);
-
- nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader,
- nir_intrinsic_vulkan_resource_index);
- src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
- nir_intrinsic_set_desc_set(src_buf, 0);
- nir_intrinsic_set_binding(src_buf, 1);
- nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
- nir_builder_instr_insert(&b, &src_buf->instr);
-
- nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
- nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info->cs.local_size[0],
- b.shader->info->cs.local_size[1],
- b.shader->info->cs.local_size[2], 0);
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
- global_id = nir_channel(&b, global_id, 0); // We only care about x here.
-
- nir_ssa_def *input_stride = nir_imm_int(&b, pipelinestat_block_size * 2);
- nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
- nir_ssa_def *output_stride = radv_load_push_int(&b, 4, "output_stride");
- nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
-
-
- avail_offset = nir_iadd(&b, avail_offset,
- nir_imul(&b, global_id, nir_imm_int(&b, 4)));
-
- nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
- load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
- load->src[1] = nir_src_for_ssa(avail_offset);
- nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
- load->num_components = 1;
- nir_builder_instr_insert(&b, &load->instr);
- nir_ssa_def *available = &load->dest.ssa;
-
- nir_ssa_def *result_is_64bit = nir_iand(&b, flags,
- nir_imm_int(&b, VK_QUERY_RESULT_64_BIT));
- nir_ssa_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
- nir_ssa_def *elem_count = nir_ushr(&b, stats_mask, nir_imm_int(&b, 16));
-
- /* Store the availability bit if requested. */
-
- nir_if *availability_if = nir_if_create(b.shader);
- availability_if->condition = nir_src_for_ssa(nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)));
- nir_cf_node_insert(b.cursor, &availability_if->cf_node);
-
- b.cursor = nir_after_cf_list(&availability_if->then_list);
-
- nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
- store->src[0] = nir_src_for_ssa(available);
- store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
- store->src[2] = nir_src_for_ssa(nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size)));
- nir_intrinsic_set_write_mask(store, 0x1);
- store->num_components = 1;
- nir_builder_instr_insert(&b, &store->instr);
-
- b.cursor = nir_after_cf_node(&availability_if->cf_node);
-
- nir_if *available_if = nir_if_create(b.shader);
- available_if->condition = nir_src_for_ssa(available);
- nir_cf_node_insert(b.cursor, &available_if->cf_node);
-
- b.cursor = nir_after_cf_list(&available_if->then_list);
-
- nir_store_var(&b, output_offset, output_base, 0x1);
- for (int i = 0; i < 11; ++i) {
- nir_if *store_if = nir_if_create(b.shader);
- store_if->condition = nir_src_for_ssa(nir_iand(&b, stats_mask, nir_imm_int(&b, 1u << i)));
- nir_cf_node_insert(b.cursor, &store_if->cf_node);
-
- b.cursor = nir_after_cf_list(&store_if->then_list);
-
- load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
- load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
- load->src[1] = nir_src_for_ssa(nir_iadd(&b, input_base,
- nir_imm_int(&b, pipeline_statistics_indices[i] * 8)));
- nir_ssa_dest_init(&load->instr, &load->dest, 1, 64, NULL);
- load->num_components = 1;
- nir_builder_instr_insert(&b, &load->instr);
- nir_ssa_def *start = &load->dest.ssa;
-
- load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
- load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
- load->src[1] = nir_src_for_ssa(nir_iadd(&b, input_base,
- nir_imm_int(&b, pipeline_statistics_indices[i] * 8 + pipelinestat_block_size)));
- nir_ssa_dest_init(&load->instr, &load->dest, 1, 64, NULL);
- load->num_components = 1;
- nir_builder_instr_insert(&b, &load->instr);
- nir_ssa_def *end = &load->dest.ssa;
-
- nir_ssa_def *result = nir_isub(&b, end, start);
-
- /* Store result */
- nir_if *store_64bit_if = nir_if_create(b.shader);
- store_64bit_if->condition = nir_src_for_ssa(result_is_64bit);
- nir_cf_node_insert(b.cursor, &store_64bit_if->cf_node);
-
- b.cursor = nir_after_cf_list(&store_64bit_if->then_list);
-
- nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
- store->src[0] = nir_src_for_ssa(result);
- store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
- store->src[2] = nir_src_for_ssa(nir_load_var(&b, output_offset));
- nir_intrinsic_set_write_mask(store, 0x1);
- store->num_components = 1;
- nir_builder_instr_insert(&b, &store->instr);
-
- b.cursor = nir_after_cf_list(&store_64bit_if->else_list);
-
- store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
- store->src[0] = nir_src_for_ssa(nir_u2u32(&b, result));
- store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
- store->src[2] = nir_src_for_ssa(nir_load_var(&b, output_offset));
- nir_intrinsic_set_write_mask(store, 0x1);
- store->num_components = 1;
- nir_builder_instr_insert(&b, &store->instr);
-
- b.cursor = nir_after_cf_node(&store_64bit_if->cf_node);
-
- nir_store_var(&b, output_offset,
- nir_iadd(&b, nir_load_var(&b, output_offset),
- elem_size), 0x1);
-
- b.cursor = nir_after_cf_node(&store_if->cf_node);
- }
-
- b.cursor = nir_after_cf_list(&available_if->else_list);
-
- available_if = nir_if_create(b.shader);
- available_if->condition = nir_src_for_ssa(nir_iand(&b, flags,
- nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT)));
- nir_cf_node_insert(b.cursor, &available_if->cf_node);
-
- b.cursor = nir_after_cf_list(&available_if->then_list);
-
- /* Stores zeros in all outputs. */
-
- nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter");
- nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1);
-
- nir_loop *loop = nir_loop_create(b.shader);
- nir_builder_cf_insert(&b, &loop->cf_node);
- b.cursor = nir_after_cf_list(&loop->body);
-
- nir_ssa_def *current_counter = nir_load_var(&b, counter);
- radv_break_on_count(&b, counter, elem_count);
-
- nir_ssa_def *output_elem = nir_iadd(&b, output_base,
- nir_imul(&b, elem_size, current_counter));
-
- nir_if *store_64bit_if = nir_if_create(b.shader);
- store_64bit_if->condition = nir_src_for_ssa(result_is_64bit);
- nir_cf_node_insert(b.cursor, &store_64bit_if->cf_node);
-
- b.cursor = nir_after_cf_list(&store_64bit_if->then_list);
-
- store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
- store->src[0] = nir_src_for_ssa(nir_imm_int64(&b, 0));
- store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
- store->src[2] = nir_src_for_ssa(output_elem);
- nir_intrinsic_set_write_mask(store, 0x1);
- store->num_components = 1;
- nir_builder_instr_insert(&b, &store->instr);
-
- b.cursor = nir_after_cf_list(&store_64bit_if->else_list);
-
- store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
- store->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
- store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
- store->src[2] = nir_src_for_ssa(output_elem);
- nir_intrinsic_set_write_mask(store, 0x1);
- store->num_components = 1;
- nir_builder_instr_insert(&b, &store->instr);
-
- b.cursor = nir_after_cf_node(&loop->cf_node);
- return b.shader;
-}
-
-VkResult radv_device_init_meta_query_state(struct radv_device *device)
-{
- VkResult result;
- struct radv_shader_module occlusion_cs = { .nir = NULL };
- struct radv_shader_module pipeline_statistics_cs = { .nir = NULL };
-
- zero(device->meta_state.query);
-
- occlusion_cs.nir = build_occlusion_query_shader(device);
- pipeline_statistics_cs.nir = build_pipeline_statistics_query_shader(device);
-
- VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &occlusion_ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.query.ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineLayoutCreateInfo occlusion_pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.query.ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &occlusion_pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.query.p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineShaderStageCreateInfo occlusion_pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = radv_shader_module_to_handle(&occlusion_cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo occlusion_vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = occlusion_pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.query.p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &occlusion_vk_pipeline_info, NULL,
- &device->meta_state.query.occlusion_query_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineShaderStageCreateInfo pipeline_statistics_pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = radv_shader_module_to_handle(&pipeline_statistics_cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo pipeline_statistics_vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_statistics_pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.query.p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &pipeline_statistics_vk_pipeline_info, NULL,
- &device->meta_state.query.pipeline_statistics_query_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- return VK_SUCCESS;
-fail:
- radv_device_finish_meta_query_state(device);
- ralloc_free(occlusion_cs.nir);
- ralloc_free(pipeline_statistics_cs.nir);
- return result;
-}
-
-void radv_device_finish_meta_query_state(struct radv_device *device)
-{
- if (device->meta_state.query.pipeline_statistics_query_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.query.pipeline_statistics_query_pipeline,
- &device->meta_state.alloc);
-
- if (device->meta_state.query.occlusion_query_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.query.occlusion_query_pipeline,
- &device->meta_state.alloc);
-
- if (device->meta_state.query.p_layout)
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.query.p_layout,
- &device->meta_state.alloc);
-
- if (device->meta_state.query.ds_layout)
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.query.ds_layout,
- &device->meta_state.alloc);
-}
-
-static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
- VkPipeline pipeline,
- struct radeon_winsys_bo *src_bo,
- struct radeon_winsys_bo *dst_bo,
- uint64_t src_offset, uint64_t dst_offset,
- uint32_t src_stride, uint32_t dst_stride,
- uint32_t count, uint32_t flags,
- uint32_t pipeline_stats_mask, uint32_t avail_offset)
-{
- struct radv_device *device = cmd_buffer->device;
- struct radv_meta_saved_compute_state saved_state;
-
- radv_meta_save_compute(&saved_state, cmd_buffer, 4);
-
- struct radv_buffer dst_buffer = {
- .bo = dst_bo,
- .offset = dst_offset,
- .size = dst_stride * count
- };
-
- struct radv_buffer src_buffer = {
- .bo = src_bo,
- .offset = src_offset,
- .size = MAX2(src_stride * count, avail_offset + 4 * count - src_offset)
- };
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.query.p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo) {
- .buffer = radv_buffer_to_handle(&dst_buffer),
- .offset = 0,
- .range = VK_WHOLE_SIZE
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo) {
- .buffer = radv_buffer_to_handle(&src_buffer),
- .offset = 0,
- .range = VK_WHOLE_SIZE
- }
- }
- });
-
- /* Encode the number of elements for easy access by the shader. */
- pipeline_stats_mask &= 0x7ff;
- pipeline_stats_mask |= util_bitcount(pipeline_stats_mask) << 16;
-
- avail_offset -= src_offset;
-
- struct {
- uint32_t flags;
- uint32_t dst_stride;
- uint32_t pipeline_stats_mask;
- uint32_t avail_offset;
- } push_constants = {
- flags,
- dst_stride,
- pipeline_stats_mask,
- avail_offset
- };
-
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.query.p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
- &push_constants);
-
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 |
- RADV_CMD_FLAG_INV_VMEM_L1;
-
- if (flags & VK_QUERY_RESULT_WAIT_BIT)
- cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER;
-
- radv_unaligned_dispatch(cmd_buffer, count, 1, 1);
-
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
-
- radv_meta_restore_compute(&saved_state, cmd_buffer, 4);
-}
-
VkResult radv_CreateQueryPool(
VkDevice _device,
const VkQueryPoolCreateInfo* pCreateInfo,
@@ -758,10 +67,12 @@ VkResult radv_CreateQueryPool(
switch(pCreateInfo->queryType) {
case VK_QUERY_TYPE_OCCLUSION:
- pool->stride = 16 * get_max_db(device);
+ /* 16 bytes tmp. buffer as the compute packet writes 64 bits, but
+ * the app. may have 32 bits of space. */
+ pool->stride = 16 * get_max_db(device) + 16;
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
- pool->stride = pipelinestat_block_size * 2;
+ pool->stride = 16 * 11;
break;
case VK_QUERY_TYPE_TIMESTAMP:
pool->stride = 8;
@@ -771,12 +82,8 @@ VkResult radv_CreateQueryPool(
}
pool->type = pCreateInfo->queryType;
- pool->pipeline_stats_mask = pCreateInfo->pipelineStatistics;
pool->availability_offset = pool->stride * pCreateInfo->queryCount;
- size = pool->availability_offset;
- if (pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP ||
- pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS)
- size += 4 * pCreateInfo->queryCount;
+ size = pool->availability_offset + 4 * pCreateInfo->queryCount;
pool->bo = device->ws->buffer_create(device->ws, size,
64, RADEON_DOMAIN_GTT, 0);
@@ -824,7 +131,6 @@ VkResult radv_GetQueryPoolResults(
VkDeviceSize stride,
VkQueryResultFlags flags)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
char *data = pData;
VkResult result = VK_SUCCESS;
@@ -835,21 +141,23 @@ VkResult radv_GetQueryPoolResults(
char *src = pool->ptr + query * pool->stride;
uint32_t available;
- if (pool->type != VK_QUERY_TYPE_OCCLUSION) {
- if (flags & VK_QUERY_RESULT_WAIT_BIT)
- while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query))
- ;
- available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
+ if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+ while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query))
+ ;
}
- switch (pool->type) {
- case VK_QUERY_TYPE_TIMESTAMP: {
- if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
- result = VK_NOT_READY;
- break;
+ if (!*(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query) &&
+ !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
+ if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
+ *(uint32_t*)dest = 0;
+ result = VK_NOT_READY;
+ continue;
- }
+ }
+ available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
+ switch (pool->type) {
+ case VK_QUERY_TYPE_TIMESTAMP:
if (flags & VK_QUERY_RESULT_64_BIT) {
*(uint64_t*)dest = *(uint64_t*)src;
dest += 8;
@@ -858,79 +166,25 @@ VkResult radv_GetQueryPoolResults(
dest += 4;
}
break;
- }
case VK_QUERY_TYPE_OCCLUSION: {
- volatile uint64_t const *src64 = (volatile uint64_t const *)src;
- uint64_t sample_count = 0;
- int db_count = get_max_db(device);
- available = 1;
-
- for (int i = 0; i < db_count; ++i) {
- uint64_t start, end;
- do {
- start = src64[2 * i];
- end = src64[2 * i + 1];
- } while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && (flags & VK_QUERY_RESULT_WAIT_BIT));
-
- if (!(start & (1ull << 63)) || !(end & (1ull << 63)))
- available = 0;
- else {
- sample_count += end - start;
- }
- }
-
- if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
- result = VK_NOT_READY;
- break;
-
- }
+ uint64_t result = *(uint64_t*)(src + pool->stride - 16);
if (flags & VK_QUERY_RESULT_64_BIT) {
- *(uint64_t*)dest = sample_count;
+ *(uint64_t*)dest = result;
dest += 8;
} else {
- *(uint32_t*)dest = sample_count;
+ *(uint32_t*)dest = result;
dest += 4;
}
break;
- }
- case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
- if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
- result = VK_NOT_READY;
- break;
-
- }
-
- const uint64_t *start = (uint64_t*)src;
- const uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size);
- if (flags & VK_QUERY_RESULT_64_BIT) {
- uint64_t *dst = (uint64_t*)dest;
- dest += util_bitcount(pool->pipeline_stats_mask) * 8;
- for(int i = 0; i < 11; ++i)
- if(pool->pipeline_stats_mask & (1u << i))
- *dst++ = stop[pipeline_statistics_indices[i]] -
- start[pipeline_statistics_indices[i]];
-
- } else {
- uint32_t *dst = (uint32_t*)dest;
- dest += util_bitcount(pool->pipeline_stats_mask) * 4;
- for(int i = 0; i < 11; ++i)
- if(pool->pipeline_stats_mask & (1u << i))
- *dst++ = stop[pipeline_statistics_indices[i]] -
- start[pipeline_statistics_indices[i]];
- }
- break;
- }
default:
unreachable("trying to get results of unhandled query type");
}
+ }
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
- if (flags & VK_QUERY_RESULT_64_BIT) {
- *(uint64_t*)dest = available;
- } else {
- *(uint32_t*)dest = available;
- }
+ *(uint32_t*)dest = available;
+ dest += 4;
}
}
@@ -951,7 +205,6 @@ void radv_CmdCopyQueryPoolResults(
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
struct radeon_winsys_cs *cs = cmd_buffer->cs;
- unsigned elem_size = (flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4;
uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
uint64_t dest_va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo);
dest_va += dst_buffer->offset + dstOffset;
@@ -959,89 +212,33 @@ void radv_CmdCopyQueryPoolResults(
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, pool->bo, 8);
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8);
- switch (pool->type) {
- case VK_QUERY_TYPE_OCCLUSION:
- if (flags & VK_QUERY_RESULT_WAIT_BIT) {
- for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
- unsigned query = firstQuery + i;
- uint64_t src_va = va + query * pool->stride + pool->stride - 4;
-
- /* Waits on the upper word of the last DB entry */
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, 5 | WAIT_REG_MEM_MEM_SPACE(1));
- radeon_emit(cs, src_va);
- radeon_emit(cs, src_va >> 32);
- radeon_emit(cs, 0x80000000); /* reference value */
- radeon_emit(cs, 0xffffffff); /* mask */
- radeon_emit(cs, 4); /* poll interval */
- }
- }
- radv_query_shader(cmd_buffer, cmd_buffer->device->meta_state.query.occlusion_query_pipeline,
- pool->bo, dst_buffer->bo, firstQuery * pool->stride,
- dst_buffer->offset + dstOffset,
- get_max_db(cmd_buffer->device) * 16, stride,
- queryCount, flags, 0, 0);
- break;
- case VK_QUERY_TYPE_PIPELINE_STATISTICS:
- if (flags & VK_QUERY_RESULT_WAIT_BIT) {
- for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
- unsigned query = firstQuery + i;
-
- radeon_check_space(cmd_buffer->device->ws, cs, 7);
+ for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
+ unsigned query = firstQuery + i;
+ uint64_t local_src_va = va + query * pool->stride;
+ unsigned elem_size = (flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4;
- uint64_t avail_va = va + pool->availability_offset + 4 * query;
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 26);
- /* This waits on the ME. All copies below are done on the ME */
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
- radeon_emit(cs, avail_va);
- radeon_emit(cs, avail_va >> 32);
- radeon_emit(cs, 1); /* reference value */
- radeon_emit(cs, 0xffffffff); /* mask */
- radeon_emit(cs, 4); /* poll interval */
- }
+ if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+ /* TODO, not sure if there is any case where we won't always be ready yet */
+ uint64_t avail_va = va + pool->availability_offset + 4 * query;
+
+
+ /* This waits on the ME. All copies below are done on the ME */
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
+ radeon_emit(cs, avail_va);
+ radeon_emit(cs, avail_va >> 32);
+ radeon_emit(cs, 1); /* reference value */
+ radeon_emit(cs, 0xffffffff); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
}
- radv_query_shader(cmd_buffer, cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
- pool->bo, dst_buffer->bo, firstQuery * pool->stride,
- dst_buffer->offset + dstOffset,
- pipelinestat_block_size * 2, stride, queryCount, flags,
- pool->pipeline_stats_mask,
- pool->availability_offset + 4 * firstQuery);
- break;
- case VK_QUERY_TYPE_TIMESTAMP:
- for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
- unsigned query = firstQuery + i;
- uint64_t local_src_va = va + query * pool->stride;
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 19);
+ switch (pool->type) {
+ case VK_QUERY_TYPE_OCCLUSION:
+ local_src_va += pool->stride - 16;
- if (flags & VK_QUERY_RESULT_WAIT_BIT) {
- /* TODO, not sure if there is any case where we won't always be ready yet */
- uint64_t avail_va = va + pool->availability_offset + 4 * query;
-
- /* This waits on the ME. All copies below are done on the ME */
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
- radeon_emit(cs, avail_va);
- radeon_emit(cs, avail_va >> 32);
- radeon_emit(cs, 1); /* reference value */
- radeon_emit(cs, 0xffffffff); /* mask */
- radeon_emit(cs, 4); /* poll interval */
- }
- if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
- uint64_t avail_va = va + pool->availability_offset + 4 * query;
- uint64_t avail_dest_va = dest_va + elem_size;
-
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_MEM));
- radeon_emit(cs, avail_va);
- radeon_emit(cs, avail_va >> 32);
- radeon_emit(cs, avail_dest_va);
- radeon_emit(cs, avail_dest_va >> 32);
- }
-
+ case VK_QUERY_TYPE_TIMESTAMP:
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
COPY_DATA_DST_SEL(COPY_DATA_MEM) |
@@ -1050,13 +247,34 @@ void radv_CmdCopyQueryPoolResults(
radeon_emit(cs, local_src_va >> 32);
radeon_emit(cs, dest_va);
radeon_emit(cs, dest_va >> 32);
+ break;
+ default:
+ unreachable("trying to get results of unhandled query type");
+ }
+ /* The flag could be still changed while the data copy is busy and we
+ * then might have invalid data, but a ready flag. However, the availability
+ * writes happen on the ME too, so they should be synchronized. Might need to
+ * revisit this with multiple queues.
+ */
+ if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+ uint64_t avail_va = va + pool->availability_offset + 4 * query;
+ uint64_t avail_dest_va = dest_va;
+ if (pool->type != VK_QUERY_TYPE_PIPELINE_STATISTICS)
+ avail_dest_va += elem_size;
+ else
+ abort();
- assert(cs->cdw <= cdw_max);
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+ COPY_DATA_DST_SEL(COPY_DATA_MEM));
+ radeon_emit(cs, avail_va);
+ radeon_emit(cs, avail_va >> 32);
+ radeon_emit(cs, avail_dest_va);
+ radeon_emit(cs, avail_dest_va >> 32);
}
- break;
- default:
- unreachable("trying to get results of unhandled query type");
+
+ assert(cs->cdw <= cdw_max);
}
}
@@ -1075,10 +293,8 @@ void radv_CmdResetQueryPool(
si_cp_dma_clear_buffer(cmd_buffer, va + firstQuery * pool->stride,
queryCount * pool->stride, 0);
- if (pool->type == VK_QUERY_TYPE_TIMESTAMP ||
- pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS)
- si_cp_dma_clear_buffer(cmd_buffer, va + pool->availability_offset + firstQuery * 4,
- queryCount * 4, 0);
+ si_cp_dma_clear_buffer(cmd_buffer, va + pool->availability_offset + firstQuery * 4,
+ queryCount * 4, 0);
}
void radv_CmdBeginQuery(
@@ -1108,14 +324,6 @@ void radv_CmdBeginQuery(
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
break;
- case VK_QUERY_TYPE_PIPELINE_STATISTICS:
- radeon_check_space(cmd_buffer->device->ws, cs, 4);
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- break;
default:
unreachable("beginning unhandled query type");
}
@@ -1149,28 +357,26 @@ void radv_CmdEndQuery(
radeon_emit(cs, va + 8);
radeon_emit(cs, (va + 8) >> 32);
- break;
- case VK_QUERY_TYPE_PIPELINE_STATISTICS:
- radeon_check_space(cmd_buffer->device->ws, cs, 10);
-
- va += pipelinestat_block_size;
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+ radeon_emit(cs, PKT3(PKT3_OCCLUSION_QUERY, 3, 0));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
+ radeon_emit(cs, va + pool->stride - 16);
+ radeon_emit(cs, (va + pool->stride - 16) >> 32);
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
- EVENT_INDEX(5));
- radeon_emit(cs, avail_va);
- radeon_emit(cs, (avail_va >> 32) | EOP_DATA_SEL(1));
- radeon_emit(cs, 1);
- radeon_emit(cs, 0);
break;
default:
unreachable("ending unhandled query type");
}
+
+ radeon_check_space(cmd_buffer->device->ws, cs, 5);
+
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
+ S_370_WR_CONFIRM(1) |
+ S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(cs, avail_va);
+ radeon_emit(cs, avail_va >> 32);
+ radeon_emit(cs, 1);
}
void radv_CmdWriteTimestamp(
@@ -1181,7 +387,6 @@ void radv_CmdWriteTimestamp(
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
- bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
struct radeon_winsys_cs *cs = cmd_buffer->cs;
uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
uint64_t avail_va = va + pool->availability_offset + 4 * query;
@@ -1189,27 +394,17 @@ void radv_CmdWriteTimestamp(
cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 5);
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12);
-
- if (mec) {
- radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
- radeon_emit(cs, 3 << 29);
- radeon_emit(cs, query_va);
- radeon_emit(cs, query_va >> 32);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- } else {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
- radeon_emit(cs, query_va);
- radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF));
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- }
+ unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 11);
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
+ radeon_emit(cs, query_va);
+ radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF));
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, S_370_DST_SEL(mec ? V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cs, avail_va);
diff --git a/lib/mesa/src/amd/vulkan/radv_radeon_winsys.h b/lib/mesa/src/amd/vulkan/radv_radeon_winsys.h
index f6bab7410..6370f3de7 100644
--- a/lib/mesa/src/amd/vulkan/radv_radeon_winsys.h
+++ b/lib/mesa/src/amd/vulkan/radv_radeon_winsys.h
@@ -47,7 +47,6 @@ enum radeon_bo_flag { /* bitfield */
RADEON_FLAG_GTT_WC = (1 << 0),
RADEON_FLAG_CPU_ACCESS = (1 << 1),
RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
- RADEON_FLAG_VIRTUAL = (1 << 3)
};
enum radeon_bo_usage { /* bitfield */
@@ -86,16 +85,14 @@ struct radeon_info {
uint32_t gart_page_size;
uint64_t gart_size;
uint64_t vram_size;
- uint64_t visible_vram_size;
bool has_dedicated_vram;
bool has_virtual_memory;
bool gfx_ib_pad_with_type2;
+ bool has_sdma;
bool has_uvd;
- uint32_t sdma_rings;
- uint32_t compute_rings;
uint32_t vce_fw_version;
uint32_t vce_harvest_config;
- uint32_t clock_crystal_freq; /* in kHz */
+ uint32_t clock_crystal_freq;
/* Kernel info. */
uint32_t drm_major; /* version */
@@ -149,7 +146,6 @@ struct radeon_info {
#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20)
#define RADEON_SURF_FMASK (1 << 21)
#define RADEON_SURF_DISABLE_DCC (1 << 22)
-#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
@@ -219,10 +215,6 @@ struct radeon_surf {
uint64_t dcc_size;
uint64_t dcc_alignment;
-
- uint64_t htile_size;
- uint64_t htile_slice_size;
- uint64_t htile_alignment;
};
enum radeon_bo_layout {
@@ -259,7 +251,6 @@ struct radeon_bo_metadata {
struct radeon_winsys_bo;
struct radeon_winsys_fence;
-struct radeon_winsys_sem;
struct radeon_winsys {
void (*destroy)(struct radeon_winsys *ws);
@@ -290,15 +281,10 @@ struct radeon_winsys {
void (*buffer_set_metadata)(struct radeon_winsys_bo *bo,
struct radeon_bo_metadata *md);
-
- void (*buffer_virtual_bind)(struct radeon_winsys_bo *parent,
- uint64_t offset, uint64_t size,
- struct radeon_winsys_bo *bo, uint64_t bo_offset);
struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws);
void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
- bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx,
- enum ring_type ring_type, int ring_index);
+ bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx);
struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
enum ring_type ring_type);
@@ -312,15 +298,8 @@ struct radeon_winsys {
void (*cs_grow)(struct radeon_winsys_cs * cs, size_t min_size);
int (*cs_submit)(struct radeon_winsys_ctx *ctx,
- int queue_index,
struct radeon_winsys_cs **cs_array,
unsigned cs_count,
- struct radeon_winsys_cs *initial_preamble_cs,
- struct radeon_winsys_cs *continue_preamble_cs,
- struct radeon_winsys_sem **wait_sem,
- unsigned wait_sem_count,
- struct radeon_winsys_sem **signal_sem,
- unsigned signal_sem_count,
bool can_patch,
struct radeon_winsys_fence *fence);
@@ -331,8 +310,6 @@ struct radeon_winsys {
void (*cs_execute_secondary)(struct radeon_winsys_cs *parent,
struct radeon_winsys_cs *child);
- void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id);
-
int (*surface_init)(struct radeon_winsys *ws,
struct radeon_surf *surf);
@@ -345,10 +322,6 @@ struct radeon_winsys {
struct radeon_winsys_fence *fence,
bool absolute,
uint64_t timeout);
-
- struct radeon_winsys_sem *(*create_sem)(struct radeon_winsys *ws);
- void (*destroy_sem)(struct radeon_winsys_sem *sem);
-
};
static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
diff --git a/lib/mesa/src/amd/vulkan/radv_util.c b/lib/mesa/src/amd/vulkan/radv_util.c
index b892eb788..8c7a948bc 100644
--- a/lib/mesa/src/amd/vulkan/radv_util.c
+++ b/lib/mesa/src/amd/vulkan/radv_util.c
@@ -29,7 +29,6 @@
#include <assert.h>
#include "radv_private.h"
-#include "vk_enum_to_str.h"
#include "util/u_math.h"
@@ -66,13 +65,55 @@ void radv_printflike(3, 4)
fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer);
}
+void radv_noreturn radv_printflike(1, 2)
+ radv_abortf(const char *format, ...)
+{
+ va_list va;
+
+ va_start(va, format);
+ radv_abortfv(format, va);
+ va_end(va);
+}
+
+void radv_noreturn
+radv_abortfv(const char *format, va_list va)
+{
+ fprintf(stderr, "vk: error: ");
+ vfprintf(stderr, format, va);
+ fprintf(stderr, "\n");
+ abort();
+}
+
VkResult
__vk_errorf(VkResult error, const char *file, int line, const char *format, ...)
{
va_list ap;
char buffer[256];
- const char *error_str = vk_Result_to_str(error);
+#define ERROR_CASE(error) case error: error_str = #error; break;
+
+ const char *error_str;
+ switch ((int32_t)error) {
+
+ /* Core errors */
+ ERROR_CASE(VK_ERROR_OUT_OF_HOST_MEMORY)
+ ERROR_CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY)
+ ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED)
+ ERROR_CASE(VK_ERROR_DEVICE_LOST)
+ ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED)
+ ERROR_CASE(VK_ERROR_LAYER_NOT_PRESENT)
+ ERROR_CASE(VK_ERROR_EXTENSION_NOT_PRESENT)
+ ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER)
+
+ /* Extension errors */
+ ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR)
+
+ default:
+ assert(!"Unknown error");
+ error_str = "unknown error";
+ }
+
+#undef ERROR_CASE
if (format) {
va_start(ap, format);
diff --git a/lib/mesa/src/amd/vulkan/radv_wsi.c b/lib/mesa/src/amd/vulkan/radv_wsi.c
index 3a8617fd8..1f1ab1c80 100644
--- a/lib/mesa/src/amd/vulkan/radv_wsi.c
+++ b/lib/mesa/src/amd/vulkan/radv_wsi.c
@@ -24,9 +24,7 @@
*/
#include "radv_private.h"
-#include "radv_meta.h"
#include "wsi_common.h"
-#include "util/vk_util.h"
static const struct wsi_callbacks wsi_cbs = {
.get_phys_device_format_properties = radv_GetPhysicalDeviceFormatProperties,
@@ -77,7 +75,7 @@ void radv_DestroySurfaceKHR(
const VkAllocationCallbacks* pAllocator)
{
RADV_FROM_HANDLE(radv_instance, instance, _instance);
- ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
+ RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
vk_free2(&instance->alloc, pAllocator, surface);
}
@@ -89,12 +87,12 @@ VkResult radv_GetPhysicalDeviceSurfaceSupportKHR(
VkBool32* pSupported)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
+ RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];
return iface->get_support(surface, &device->wsi_device,
&device->instance->alloc,
- queueFamilyIndex, device->local_fd, true, pSupported);
+ queueFamilyIndex, pSupported);
}
VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
@@ -103,7 +101,7 @@ VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
+ RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];
return iface->get_capabilities(surface, pSurfaceCapabilities);
@@ -116,7 +114,7 @@ VkResult radv_GetPhysicalDeviceSurfaceFormatsKHR(
VkSurfaceFormatKHR* pSurfaceFormats)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
+ RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];
return iface->get_formats(surface, &device->wsi_device, pSurfaceFormatCount,
@@ -130,7 +128,7 @@ VkResult radv_GetPhysicalDeviceSurfacePresentModesKHR(
VkPresentModeKHR* pPresentModes)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
+ RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];
return iface->get_present_modes(surface, pPresentModeCount,
@@ -141,18 +139,18 @@ static VkResult
radv_wsi_image_create(VkDevice device_h,
const VkSwapchainCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks* pAllocator,
- bool needs_linear_copy,
- bool linear,
VkImage *image_p,
VkDeviceMemory *memory_p,
uint32_t *size,
uint32_t *offset,
uint32_t *row_pitch, int *fd_p)
{
+ struct radv_device *device = radv_device_from_handle(device_h);
VkResult result = VK_SUCCESS;
struct radeon_surf *surface;
VkImage image_h;
struct radv_image *image;
+ bool bret;
int fd;
result = radv_image_create(device_h,
@@ -171,7 +169,7 @@ radv_wsi_image_create(VkDevice device_h,
.arrayLayers = 1,
.samples = 1,
/* FIXME: Need a way to use X tiling to allow scanout */
- .tiling = linear ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.flags = 0,
},
@@ -184,44 +182,37 @@ radv_wsi_image_create(VkDevice device_h,
image = radv_image_from_handle(image_h);
VkDeviceMemory memory_h;
-
- const VkDedicatedAllocationMemoryAllocateInfoNV ded_alloc = {
- .sType = VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV,
- .pNext = NULL,
- .buffer = VK_NULL_HANDLE,
- .image = image_h
- };
-
+ struct radv_device_memory *memory;
result = radv_AllocateMemory(device_h,
&(VkMemoryAllocateInfo) {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = &ded_alloc,
- .allocationSize = image->size,
- .memoryTypeIndex = linear ? 1 : 0,
- },
+ .allocationSize = image->size,
+ .memoryTypeIndex = 0,
+ },
NULL /* XXX: pAllocator */,
&memory_h);
if (result != VK_SUCCESS)
goto fail_create_image;
- radv_BindImageMemory(device_h, image_h, memory_h, 0);
-
- /*
- * return the fd for the image in the no copy mode,
- * or the fd for the linear image if a copy is required.
- */
- if (!needs_linear_copy || (needs_linear_copy && linear)) {
- RADV_FROM_HANDLE(radv_device, device, device_h);
- RADV_FROM_HANDLE(radv_device_memory, memory, memory_h);
- if (!radv_get_memory_fd(device, memory, &fd))
- goto fail_alloc_memory;
- *fd_p = fd;
- }
+ memory = radv_device_memory_from_handle(memory_h);
+
+ radv_BindImageMemory(VK_NULL_HANDLE, image_h, memory_h, 0);
+
+ bret = device->ws->buffer_get_fd(device->ws,
+ memory->bo, &fd);
+ if (bret == false)
+ goto fail_alloc_memory;
+ {
+ struct radeon_bo_metadata metadata;
+ radv_init_metadata(device, image, &metadata);
+ device->ws->buffer_set_metadata(memory->bo, &metadata);
+ }
surface = &image->surface;
*image_p = image_h;
*memory_p = memory_h;
+ *fd_p = fd;
*size = image->size;
*offset = image->offset;
*row_pitch = surface->level[0].pitch_bytes;
@@ -251,94 +242,6 @@ static const struct wsi_image_fns radv_wsi_image_fns = {
.free_wsi_image = radv_wsi_image_free,
};
-#define NUM_PRIME_POOLS RADV_QUEUE_TRANSFER
-static void
-radv_wsi_free_prime_command_buffers(struct radv_device *device,
- struct wsi_swapchain *swapchain)
-{
- const int num_pools = NUM_PRIME_POOLS;
- const int num_images = swapchain->image_count;
- int i;
- for (i = 0; i < num_pools; i++) {
- radv_FreeCommandBuffers(radv_device_to_handle(device),
- swapchain->cmd_pools[i],
- swapchain->image_count,
- &swapchain->cmd_buffers[i * num_images]);
-
- radv_DestroyCommandPool(radv_device_to_handle(device),
- swapchain->cmd_pools[i],
- &swapchain->alloc);
- }
-}
-
-static VkResult
-radv_wsi_create_prime_command_buffers(struct radv_device *device,
- const VkAllocationCallbacks *alloc,
- struct wsi_swapchain *swapchain)
-{
- const int num_pools = NUM_PRIME_POOLS;
- const int num_images = swapchain->image_count;
- int num_cmd_buffers = num_images * num_pools; //TODO bump to MAX_QUEUE_FAMILIES
- VkResult result;
- int i, j;
-
- swapchain->cmd_buffers = vk_alloc(alloc, (sizeof(VkCommandBuffer) * num_cmd_buffers), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
- if (!swapchain->cmd_buffers)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- memset(swapchain->cmd_buffers, 0, sizeof(VkCommandBuffer) * num_cmd_buffers);
- memset(swapchain->cmd_pools, 0, sizeof(VkCommandPool) * num_pools);
- for (i = 0; i < num_pools; i++) {
- VkCommandPoolCreateInfo pool_create_info;
-
- pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
- pool_create_info.pNext = NULL;
- pool_create_info.flags = 0;
- pool_create_info.queueFamilyIndex = i;
-
- result = radv_CreateCommandPool(radv_device_to_handle(device),
- &pool_create_info, alloc,
- &swapchain->cmd_pools[i]);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkCommandBufferAllocateInfo cmd_buffer_info;
- cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
- cmd_buffer_info.pNext = NULL;
- cmd_buffer_info.commandPool = swapchain->cmd_pools[i];
- cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
- cmd_buffer_info.commandBufferCount = num_images;
-
- result = radv_AllocateCommandBuffers(radv_device_to_handle(device),
- &cmd_buffer_info,
- &swapchain->cmd_buffers[i * num_images]);
- if (result != VK_SUCCESS)
- goto fail;
- for (j = 0; j < num_images; j++) {
- VkImage image, linear_image;
- int idx = (i * num_images) + j;
-
- swapchain->get_image_and_linear(swapchain, j, &image, &linear_image);
- VkCommandBufferBeginInfo begin_info = {0};
-
- begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-
- radv_BeginCommandBuffer(swapchain->cmd_buffers[idx], &begin_info);
-
- radv_blit_to_prime_linear(radv_cmd_buffer_from_handle(swapchain->cmd_buffers[idx]),
- radv_image_from_handle(image),
- radv_image_from_handle(linear_image));
-
- radv_EndCommandBuffer(swapchain->cmd_buffers[idx]);
- }
- }
- return VK_SUCCESS;
-fail:
- radv_wsi_free_prime_command_buffers(device, swapchain);
- return result;
-}
-
VkResult radv_CreateSwapchainKHR(
VkDevice _device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
@@ -346,9 +249,9 @@ VkResult radv_CreateSwapchainKHR(
VkSwapchainKHR* pSwapchain)
{
RADV_FROM_HANDLE(radv_device, device, _device);
- ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, pCreateInfo->surface);
+ RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface);
struct wsi_interface *iface =
- device->physical_device->wsi_device.wsi[surface->platform];
+ device->instance->physicalDevice.wsi_device.wsi[surface->platform];
struct wsi_swapchain *swapchain;
const VkAllocationCallbacks *alloc;
if (pAllocator)
@@ -356,8 +259,7 @@ VkResult radv_CreateSwapchainKHR(
else
alloc = &device->alloc;
VkResult result = iface->create_swapchain(surface, _device,
- &device->physical_device->wsi_device,
- device->physical_device->local_fd,
+ &device->instance->physicalDevice.wsi_device,
pCreateInfo,
alloc, &radv_wsi_image_fns,
&swapchain);
@@ -372,13 +274,6 @@ VkResult radv_CreateSwapchainKHR(
for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++)
swapchain->fences[i] = VK_NULL_HANDLE;
- if (swapchain->needs_linear_copy) {
- result = radv_wsi_create_prime_command_buffers(device, alloc,
- swapchain);
- if (result != VK_SUCCESS)
- return result;
- }
-
*pSwapchain = wsi_swapchain_to_handle(swapchain);
return VK_SUCCESS;
@@ -406,9 +301,6 @@ void radv_DestroySwapchainKHR(
radv_DestroyFence(_device, swapchain->fences[i], pAllocator);
}
- if (swapchain->needs_linear_copy)
- radv_wsi_free_prime_command_buffers(device, swapchain);
-
swapchain->destroy(swapchain, alloc);
}
@@ -453,59 +345,30 @@ VkResult radv_QueuePresentKHR(
RADV_FROM_HANDLE(radv_queue, queue, _queue);
VkResult result = VK_SUCCESS;
- const VkPresentRegionsKHR *regions =
- vk_find_struct_const(pPresentInfo->pNext, PRESENT_REGIONS_KHR);
-
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
RADV_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
- struct radeon_winsys_cs *cs;
- const VkPresentRegionKHR *region = NULL;
- VkResult item_result;
assert(radv_device_from_handle(swapchain->device) == queue->device);
if (swapchain->fences[0] == VK_NULL_HANDLE) {
- item_result = radv_CreateFence(radv_device_to_handle(queue->device),
+ result = radv_CreateFence(radv_device_to_handle(queue->device),
&(VkFenceCreateInfo) {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.flags = 0,
}, &swapchain->alloc, &swapchain->fences[0]);
- if (pPresentInfo->pResults != NULL)
- pPresentInfo->pResults[i] = item_result;
- result = result == VK_SUCCESS ? item_result : result;
- if (item_result != VK_SUCCESS)
- continue;
+ if (result != VK_SUCCESS)
+ return result;
} else {
radv_ResetFences(radv_device_to_handle(queue->device),
1, &swapchain->fences[0]);
}
- if (swapchain->needs_linear_copy) {
- int idx = (queue->queue_family_index * swapchain->image_count) + pPresentInfo->pImageIndices[i];
- cs = radv_cmd_buffer_from_handle(swapchain->cmd_buffers[idx])->cs;
- } else
- cs = queue->device->empty_cs[queue->queue_family_index];
- RADV_FROM_HANDLE(radv_fence, fence, swapchain->fences[0]);
- struct radeon_winsys_fence *base_fence = fence->fence;
- struct radeon_winsys_ctx *ctx = queue->hw_ctx;
- queue->device->ws->cs_submit(ctx, queue->queue_idx,
- &cs,
- 1, NULL, NULL,
- (struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores,
- pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence);
- fence->submitted = true;
-
- if (regions && regions->pRegions)
- region = &regions->pRegions[i];
+ radv_QueueSubmit(_queue, 0, NULL, swapchain->fences[0]);
- item_result = swapchain->queue_present(swapchain,
- pPresentInfo->pImageIndices[i],
- region);
+ result = swapchain->queue_present(swapchain,
+ pPresentInfo->pImageIndices[i]);
/* TODO: What if one of them returns OUT_OF_DATE? */
- if (pPresentInfo->pResults != NULL)
- pPresentInfo->pResults[i] = item_result;
- result = result == VK_SUCCESS ? item_result : result;
- if (item_result != VK_SUCCESS)
- continue;
+ if (result != VK_SUCCESS)
+ return result;
VkFence last = swapchain->fences[2];
swapchain->fences[2] = swapchain->fences[1];
diff --git a/lib/mesa/src/amd/vulkan/radv_wsi_wayland.c b/lib/mesa/src/amd/vulkan/radv_wsi_wayland.c
index d9a4c72d6..c6a9667d9 100644
--- a/lib/mesa/src/amd/vulkan/radv_wsi_wayland.c
+++ b/lib/mesa/src/amd/vulkan/radv_wsi_wayland.c
@@ -23,6 +23,9 @@
* IN THE SOFTWARE.
*/
+#include <wayland-client.h>
+#include <wayland-drm-client-protocol.h>
+
#include "wsi_common_wayland.h"
#include "radv_private.h"
diff --git a/lib/mesa/src/amd/vulkan/radv_wsi_x11.c b/lib/mesa/src/amd/vulkan/radv_wsi_x11.c
index c65ac9387..946b99095 100644
--- a/lib/mesa/src/amd/vulkan/radv_wsi_x11.c
+++ b/lib/mesa/src/amd/vulkan/radv_wsi_x11.c
@@ -45,9 +45,7 @@ VkBool32 radv_GetPhysicalDeviceXcbPresentationSupportKHR(
return wsi_get_physical_device_xcb_presentation_support(
&device->wsi_device,
&device->instance->alloc,
- queueFamilyIndex,
- device->local_fd, true,
- connection, visual_id);
+ queueFamilyIndex, connection, visual_id);
}
VkBool32 radv_GetPhysicalDeviceXlibPresentationSupportKHR(
@@ -61,9 +59,7 @@ VkBool32 radv_GetPhysicalDeviceXlibPresentationSupportKHR(
return wsi_get_physical_device_xcb_presentation_support(
&device->wsi_device,
&device->instance->alloc,
- queueFamilyIndex,
- device->local_fd, true,
- XGetXCBConnection(dpy), visualID);
+ queueFamilyIndex, XGetXCBConnection(dpy), visualID);
}
VkResult radv_CreateXcbSurfaceKHR(
diff --git a/lib/mesa/src/amd/vulkan/si_cmd_buffer.c b/lib/mesa/src/amd/vulkan/si_cmd_buffer.c
index 8d7db9644..a61a950de 100644
--- a/lib/mesa/src/amd/vulkan/si_cmd_buffer.c
+++ b/lib/mesa/src/amd/vulkan/si_cmd_buffer.c
@@ -171,7 +171,7 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device,
}
static void
-si_emit_compute(struct radv_physical_device *physical_device,
+si_init_compute(struct radv_physical_device *physical_device,
struct radeon_winsys_cs *cs)
{
radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
@@ -209,22 +209,15 @@ si_emit_compute(struct radv_physical_device *physical_device,
}
}
-void
-si_init_compute(struct radv_cmd_buffer *cmd_buffer)
-{
- struct radv_physical_device *physical_device = cmd_buffer->device->physical_device;
- si_emit_compute(physical_device, cmd_buffer->cs);
-}
-static void
-si_emit_config(struct radv_physical_device *physical_device,
- struct radeon_winsys_cs *cs)
+void si_init_config(struct radv_physical_device *physical_device,
+ struct radv_cmd_buffer *cmd_buffer)
{
unsigned num_rb = MIN2(physical_device->rad_info.num_render_backends, 16);
unsigned rb_mask = physical_device->rad_info.enabled_rb_mask;
unsigned raster_config, raster_config_1;
int i;
-
+ struct radeon_winsys_cs *cs = cmd_buffer->cs;
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(cs, CONTEXT_CONTROL_LOAD_ENABLE(1));
radeon_emit(cs, CONTEXT_CONTROL_SHADOW_ENABLE(1));
@@ -297,7 +290,6 @@ si_emit_config(struct radv_physical_device *physical_device,
raster_config_1 = 0x0000002a;
break;
case CHIP_POLARIS11:
- case CHIP_POLARIS12:
raster_config = 0x16000012;
raster_config_1 = 0x00000000;
break;
@@ -362,6 +354,11 @@ si_emit_config(struct radv_physical_device *physical_device,
radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0);
+ radeon_set_context_reg(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
+ radeon_set_context_reg(cs, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
+ radeon_set_context_reg(cs, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
+ radeon_set_context_reg(cs, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));
+
radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
@@ -374,15 +371,6 @@ si_emit_config(struct radv_physical_device *physical_device,
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
if (physical_device->rad_info.chip_class >= CIK) {
- /* If this is 0, Bonaire can hang even if GS isn't being used.
- * Other chips are unaffected. These are suboptimal values,
- * but we don't use on-chip GS.
- */
- radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL,
- S_028A44_ES_VERTS_PER_SUBGRP(64) |
- S_028A44_GS_PRIMS_PER_SUBGRP(4));
-
- radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
@@ -395,6 +383,7 @@ si_emit_config(struct radv_physical_device *physical_device,
*
* LATE_ALLOC_VS = 2 is the highest safe number.
*/
+ radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
} else {
@@ -403,6 +392,7 @@ si_emit_config(struct radv_physical_device *physical_device,
* - VS can't execute on CU0.
* - If HS writes outputs to LDS, LS can't execute on CU0.
*/
+ radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe));
radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
}
@@ -411,25 +401,16 @@ si_emit_config(struct radv_physical_device *physical_device,
}
if (physical_device->rad_info.chip_class >= VI) {
- uint32_t vgt_tess_distribution;
radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL,
S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
S_028424_OVERWRITE_COMBINER_WATERMARK(4));
- if (physical_device->rad_info.family < CHIP_POLARIS10)
- radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
+ radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
-
- vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) |
- S_028B50_ACCUM_TRI(11) |
- S_028B50_ACCUM_QUAD(11) |
- S_028B50_DONUT_SPLIT(16);
-
- if (physical_device->rad_info.family == CHIP_FIJI ||
- physical_device->rad_info.family >= CHIP_POLARIS10)
- vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
-
radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
- vgt_tess_distribution);
+ S_028B50_ACCUM_ISOLINE(32) |
+ S_028B50_ACCUM_TRI(11) |
+ S_028B50_ACCUM_QUAD(11) |
+ S_028B50_DONUT_SPLIT(16));
} else {
radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
@@ -438,51 +419,7 @@ si_emit_config(struct radv_physical_device *physical_device,
if (physical_device->rad_info.family == CHIP_STONEY)
radeon_set_context_reg(cs, R_028C40_PA_SC_SHADER_CONTROL, 0);
- si_emit_compute(physical_device, cs);
-}
-
-void si_init_config(struct radv_cmd_buffer *cmd_buffer)
-{
- struct radv_physical_device *physical_device = cmd_buffer->device->physical_device;
-
- si_emit_config(physical_device, cmd_buffer->cs);
-}
-
-void
-cik_create_gfx_config(struct radv_device *device)
-{
- struct radeon_winsys_cs *cs = device->ws->cs_create(device->ws, RING_GFX);
- if (!cs)
- return;
-
- si_emit_config(device->physical_device, cs);
-
- while (cs->cdw & 7) {
- if (device->physical_device->rad_info.gfx_ib_pad_with_type2)
- radeon_emit(cs, 0x80000000);
- else
- radeon_emit(cs, 0xffff1000);
- }
-
- device->gfx_init = device->ws->buffer_create(device->ws,
- cs->cdw * 4, 4096,
- RADEON_DOMAIN_GTT,
- RADEON_FLAG_CPU_ACCESS);
- if (!device->gfx_init)
- goto fail;
-
- void *map = device->ws->buffer_map(device->gfx_init);
- if (!map) {
- device->ws->buffer_destroy(device->gfx_init);
- device->gfx_init = NULL;
- goto fail;
- }
- memcpy(map, cs->buf, cs->cdw * 4);
-
- device->ws->buffer_unmap(device->gfx_init);
- device->gfx_init_size_dw = cs->cdw;
-fail:
- device->ws->cs_destroy(cs);
+ si_init_compute(physical_device, cs);
}
static void
@@ -511,7 +448,21 @@ si_write_viewport(struct radeon_winsys_cs *cs, int first_vp,
{
int i;
- assert(count);
+ if (count == 0) {
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
+ radeon_emit(cs, fui(1.0));
+ radeon_emit(cs, fui(0.0));
+ radeon_emit(cs, fui(1.0));
+ radeon_emit(cs, fui(0.0));
+ radeon_emit(cs, fui(1.0));
+ radeon_emit(cs, fui(0.0));
+
+ radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
+ radeon_emit(cs, fui(0.0));
+ radeon_emit(cs, fui(1.0));
+
+ return;
+ }
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
first_vp * 4 * 6, count * 6);
@@ -528,110 +479,39 @@ si_write_viewport(struct radeon_winsys_cs *cs, int first_vp,
radeon_emit(cs, fui(translate[2]));
}
- radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
- first_vp * 4 * 2, count * 2);
for (i = 0; i < count; i++) {
float zmin = MIN2(viewports[i].minDepth, viewports[i].maxDepth);
float zmax = MAX2(viewports[i].minDepth, viewports[i].maxDepth);
+ radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
+ first_vp * 4 * 2, count * 2);
radeon_emit(cs, fui(zmin));
radeon_emit(cs, fui(zmax));
}
}
-static VkRect2D si_scissor_from_viewport(const VkViewport *viewport)
-{
- float scale[3], translate[3];
- VkRect2D rect;
-
- get_viewport_xform(viewport, scale, translate);
-
- rect.offset.x = translate[0] - abs(scale[0]);
- rect.offset.y = translate[1] - abs(scale[1]);
- rect.extent.width = ceilf(translate[0] + abs(scale[0])) - rect.offset.x;
- rect.extent.height = ceilf(translate[1] + abs(scale[1])) - rect.offset.y;
-
- return rect;
-}
-
-static VkRect2D si_intersect_scissor(const VkRect2D *a, const VkRect2D *b) {
- VkRect2D ret;
- ret.offset.x = MAX2(a->offset.x, b->offset.x);
- ret.offset.y = MAX2(a->offset.y, b->offset.y);
- ret.extent.width = MIN2(a->offset.x + a->extent.width,
- b->offset.x + b->extent.width) - ret.offset.x;
- ret.extent.height = MIN2(a->offset.y + a->extent.height,
- b->offset.y + b->extent.height) - ret.offset.y;
- return ret;
-}
-
void
si_write_scissors(struct radeon_winsys_cs *cs, int first,
- int count, const VkRect2D *scissors,
- const VkViewport *viewports, bool can_use_guardband)
+ int count, const VkRect2D *scissors)
{
int i;
- float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY;
- const float max_range = 32767.0f;
- assert(count);
+ if (count == 0)
+ return;
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2);
for (i = 0; i < count; i++) {
- VkRect2D viewport_scissor = si_scissor_from_viewport(viewports + i);
- VkRect2D scissor = si_intersect_scissor(&scissors[i], &viewport_scissor);
-
- get_viewport_xform(viewports + i, scale, translate);
- scale[0] = abs(scale[0]);
- scale[1] = abs(scale[1]);
-
- if (scale[0] < 0.5)
- scale[0] = 0.5;
- if (scale[1] < 0.5)
- scale[1] = 0.5;
-
- guardband_x = MIN2(guardband_x, (max_range - abs(translate[0])) / scale[0]);
- guardband_y = MIN2(guardband_y, (max_range - abs(translate[1])) / scale[1]);
-
- radeon_emit(cs, S_028250_TL_X(scissor.offset.x) |
- S_028250_TL_Y(scissor.offset.y) |
+ radeon_emit(cs, S_028250_TL_X(scissors[i].offset.x) |
+ S_028250_TL_Y(scissors[i].offset.y) |
S_028250_WINDOW_OFFSET_DISABLE(1));
- radeon_emit(cs, S_028254_BR_X(scissor.offset.x + scissor.extent.width) |
- S_028254_BR_Y(scissor.offset.y + scissor.extent.height));
+ radeon_emit(cs, S_028254_BR_X(scissors[i].offset.x + scissors[i].extent.width) |
+ S_028254_BR_Y(scissors[i].offset.y + scissors[i].extent.height));
}
- if (!can_use_guardband) {
- guardband_x = 1.0;
- guardband_y = 1.0;
- }
-
- radeon_set_context_reg_seq(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
- radeon_emit(cs, fui(guardband_y));
- radeon_emit(cs, fui(1.0));
- radeon_emit(cs, fui(guardband_x));
- radeon_emit(cs, fui(1.0));
-}
-
-static inline unsigned
-radv_prims_for_vertices(struct radv_prim_vertex_count *info, unsigned num)
-{
- if (num == 0)
- return 0;
-
- if (info->incr == 0)
- return 0;
-
- if (num < info->min)
- return 0;
-
- return 1 + ((num - info->min) / info->incr);
}
uint32_t
-si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
- bool instanced_draw, bool indirect_draw,
- uint32_t draw_vertex_count)
+si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer)
{
- enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
- enum radeon_family family = cmd_buffer->device->physical_device->rad_info.family;
- struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
+ enum chip_class chip_class = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class;
+ struct radeon_info *info = &cmd_buffer->device->instance->physicalDevice.rad_info;
unsigned prim = cmd_buffer->state.pipeline->graphics.prim;
unsigned primgroup_size = 128; /* recommended without a GS */
unsigned max_primgroup_in_wave = 2;
@@ -641,45 +521,11 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
bool ia_switch_on_eoi = false;
bool partial_vs_wave = false;
bool partial_es_wave = false;
- uint32_t num_prims = radv_prims_for_vertices(&cmd_buffer->state.pipeline->graphics.prim_vertex_count, draw_vertex_count);
- bool multi_instances_smaller_than_primgroup;
-
- if (radv_pipeline_has_tess(cmd_buffer->state.pipeline))
- primgroup_size = cmd_buffer->state.pipeline->graphics.tess.num_patches;
- else if (radv_pipeline_has_gs(cmd_buffer->state.pipeline))
- primgroup_size = 64; /* recommended with a GS */
-
- multi_instances_smaller_than_primgroup = indirect_draw || (instanced_draw &&
- num_prims < primgroup_size);
- if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) {
- /* SWITCH_ON_EOI must be set if PrimID is used. */
- if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.uses_prim_id ||
- cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.uses_prim_id)
- ia_switch_on_eoi = true;
- /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
- if ((family == CHIP_TAHITI ||
- family == CHIP_PITCAIRN ||
- family == CHIP_BONAIRE) &&
- radv_pipeline_has_gs(cmd_buffer->state.pipeline))
- partial_vs_wave = true;
+ /* TODO GS */
+
+ /* TODO TES */
- /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
- if (cmd_buffer->device->has_distributed_tess) {
- if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
- partial_es_wave = true;
-
- if (family == CHIP_TONGA ||
- family == CHIP_FIJI ||
- family == CHIP_POLARIS10 ||
- family == CHIP_POLARIS11 ||
- family == CHIP_POLARIS12)
- partial_vs_wave = true;
- } else {
- partial_vs_wave = true;
- }
- }
- }
/* TODO linestipple */
if (chip_class >= CIK) {
@@ -690,47 +536,32 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
prim == V_008958_DI_PT_POLYGON ||
prim == V_008958_DI_PT_LINELOOP ||
prim == V_008958_DI_PT_TRIFAN ||
- prim == V_008958_DI_PT_TRISTRIP_ADJ ||
- (cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
- (family < CHIP_POLARIS10 ||
- (prim != V_008958_DI_PT_POINTLIST &&
- prim != V_008958_DI_PT_LINESTRIP &&
- prim != V_008958_DI_PT_TRISTRIP))))
- wd_switch_on_eop = true;
-
- /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
- * We don't know that for indirect drawing, so treat it as
- * always problematic. */
- if (family == CHIP_HAWAII &&
- (instanced_draw || indirect_draw))
+ prim == V_008958_DI_PT_TRISTRIP_ADJ)
+ // info->primitive_restart ||
+ // info->count_from_stream_output)
wd_switch_on_eop = true;
- /* Performance recommendation for 4 SE Gfx7-8 parts if
- * instances are smaller than a primgroup.
- * Assume indirect draws always use small instances.
- * This is needed for good VS wave utilization.
- */
- if (chip_class <= VI &&
- info->max_se == 4 &&
- multi_instances_smaller_than_primgroup)
- wd_switch_on_eop = true;
+ /* TODO HAWAII */
/* Required on CIK and later. */
if (info->max_se > 2 && !wd_switch_on_eop)
ia_switch_on_eoi = true;
/* Required by Hawaii and, for some special cases, by VI. */
+#if 0
if (ia_switch_on_eoi &&
- (family == CHIP_HAWAII ||
- (chip_class == VI &&
- (radv_pipeline_has_gs(cmd_buffer->state.pipeline) || max_primgroup_in_wave != 2))))
+ (sctx->b.family == CHIP_HAWAII ||
+ (sctx->b.chip_class == VI &&
+ (sctx->gs_shader.cso || max_primgroup_in_wave != 2))))
partial_vs_wave = true;
+#endif
+#if 0
/* Instancing bug on Bonaire. */
- if (family == CHIP_BONAIRE && ia_switch_on_eoi &&
- (instanced_draw || indirect_draw))
+ if (sctx->b.family == CHIP_BONAIRE && ia_switch_on_eoi &&
+ (info->indirect || info->instance_count > 1))
partial_vs_wave = true;
-
+#endif
/* If the WD switch is false, the IA switch must be false too. */
assert(wd_switch_on_eop || !ia_switch_on_eop);
}
@@ -738,19 +569,21 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
if (ia_switch_on_eoi)
partial_es_wave = true;
- if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
- /* GS requirement. */
- if (SI_GS_PER_ES / primgroup_size >= cmd_buffer->device->gs_table_depth - 3)
- partial_es_wave = true;
-
- /* Hw bug with single-primitive instances and SWITCH_ON_EOI
- * on multi-SE chips. */
- if (info->max_se >= 2 && ia_switch_on_eoi &&
- ((instanced_draw || indirect_draw) &&
- num_prims <= 1))
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
- }
-
+ /* GS requirement. */
+#if 0
+ if (SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3)
+ partial_es_wave = true;
+#endif
+
+ /* Hw bug with single-primitive instances and SWITCH_ON_EOI
+ * on multi-SE chips. */
+#if 0
+ if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi &&
+ (info->indirect ||
+ (info->instance_count > 1 &&
+ si_num_prims_for_vertices(info) <= 1)))
+ sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
+#endif
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
@@ -762,44 +595,27 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
}
-static void
-si_emit_acquire_mem(struct radeon_winsys_cs *cs,
- bool is_mec,
- unsigned cp_coher_cntl)
-{
- if (is_mec) {
- radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
- radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
- radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */
- radeon_emit(cs, 0); /* CP_COHER_BASE */
- radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
- radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
- } else {
- /* ACQUIRE_MEM is only required on a compute ring. */
- radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
- radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
- radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
- radeon_emit(cs, 0); /* CP_COHER_BASE */
- radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
- }
-}
-
void
-si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
- enum chip_class chip_class,
- bool is_mec,
- enum radv_cmd_flush_bits flush_bits)
+si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
{
+ enum chip_class chip_class = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class;
unsigned cp_coher_cntl = 0;
- if (flush_bits & RADV_CMD_FLAG_INV_ICACHE)
+ radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128);
+
+ if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_ICACHE)
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
- if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
+ if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
+ if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_VMEM_L1)
+ cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
+ if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
+ cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
+ if (chip_class >= VI)
+ cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
+ }
- if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
+ if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
S_0085F0_CB0_DEST_BASE_ENA(1) |
S_0085F0_CB1_DEST_BASE_ENA(1) |
@@ -811,112 +627,74 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
S_0085F0_CB7_DEST_BASE_ENA(1);
/* Necessary for DCC */
- if (chip_class >= VI) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) |
+ if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= VI) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+ radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) |
EVENT_INDEX(5));
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
+ radeon_emit(cmd_buffer->cs, 0);
+ radeon_emit(cmd_buffer->cs, 0);
+ radeon_emit(cmd_buffer->cs, 0);
+ radeon_emit(cmd_buffer->cs, 0);
}
}
- if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
+ if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
S_0085F0_DB_DEST_BASE_ENA(1);
}
- if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
+ if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
}
- if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
+ if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
}
- if (!(flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+ if (!(cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB))) {
- if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
- } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+ if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+ } else if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
}
}
- if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+ if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
}
/* VGT state sync */
- if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
+ if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
}
/* Make sure ME is idle (it executes most packets) before continuing.
* This prevents read-after-write hazards between PFP and ME.
*/
- if ((cp_coher_cntl || (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) &&
- !is_mec) {
- radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
- radeon_emit(cs, 0);
+ if (cp_coher_cntl || (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+ radeon_emit(cmd_buffer->cs, 0);
}
- if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
- (chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
- cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
- if (chip_class >= VI)
- cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
- } else if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
- cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1) |
- S_0301F0_TC_NC_ACTION_ENA(1);
-
- /* L2 writeback doesn't combine with L1 invalidate */
- si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
-
- cp_coher_cntl = 0;
- }
-
- if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1)
- cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
-
/* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle.
* Therefore, it should be last. Done in PFP.
*/
- if (cp_coher_cntl)
- si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
-}
-
-void
-si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
-{
- bool is_compute = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
-
- if (is_compute)
- cmd_buffer->state.flush_bits &= ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
- RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_VS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_VGT_FLUSH);
-
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128);
-
- si_cs_emit_cache_flush(cmd_buffer->cs,
- cmd_buffer->device->physical_device->rad_info.chip_class,
- radv_cmd_buffer_uses_mec(cmd_buffer),
- cmd_buffer->state.flush_bits);
-
+ if (cp_coher_cntl) {
+ /* ACQUIRE_MEM is only required on a compute ring. */
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
+ radeon_emit(cmd_buffer->cs, cp_coher_cntl); /* CP_COHER_CNTL */
+ radeon_emit(cmd_buffer->cs, 0xffffffff); /* CP_COHER_SIZE */
+ radeon_emit(cmd_buffer->cs, 0); /* CP_COHER_BASE */
+ radeon_emit(cmd_buffer->cs, 0x0000000A); /* POLL_INTERVAL */
+ }
- if (cmd_buffer->state.flush_bits)
- radv_cmd_buffer_trace_emit(cmd_buffer);
cmd_buffer->state.flush_bits = 0;
}
@@ -942,7 +720,7 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
{
struct radeon_winsys_cs *cs = cmd_buffer->cs;
uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
- uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM_GFX6(1) : 0;
+ uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM(1) : 0;
uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
uint32_t sel = flags & CIK_CP_DMA_USE_L2 ?
S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) |
@@ -953,7 +731,7 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
+ if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
radeon_emit(cs, sync_flag | sel); /* CP_SYNC [31] */
radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
@@ -975,12 +753,10 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
* indices. If we wanted to execute CP DMA in PFP, this packet
* should precede it.
*/
- if (sync_flag && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
+ if (sync_flag) {
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
-
- radv_cmd_buffer_trace_emit(cmd_buffer);
}
/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
@@ -990,7 +766,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
{
struct radeon_winsys_cs *cs = cmd_buffer->cs;
uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
- uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM_GFX6(1) : 0;
+ uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM(1) : 0;
uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0;
@@ -999,7 +775,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
+ if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
radeon_emit(cs, sync_flag | dst_sel | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
radeon_emit(cs, clear_value); /* DATA [31:0] */
@@ -1017,11 +793,10 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
}
/* See "copy_buffer" for explanation. */
- if (sync_flag && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
+ if (sync_flag) {
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
- radv_cmd_buffer_trace_emit(cmd_buffer);
}
static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
@@ -1072,8 +847,8 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
uint64_t skipped_size = 0, realign_size = 0;
- if (cmd_buffer->device->physical_device->rad_info.family <= CHIP_CARRIZO ||
- cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) {
+ if (cmd_buffer->device->instance->physicalDevice.rad_info.family <= CHIP_CARRIZO ||
+ cmd_buffer->device->instance->physicalDevice.rad_info.family == CHIP_STONEY) {
/* If the size is not aligned, we must add a dummy copy at the end
* just to align the internal counter. Otherwise, the DMA engine
* would slow down by an order of magnitude for following copies.
diff --git a/lib/mesa/src/amd/vulkan/vk_format.h b/lib/mesa/src/amd/vulkan/vk_format.h
index 13ac17934..58ee3f71f 100644
--- a/lib/mesa/src/amd/vulkan/vk_format.h
+++ b/lib/mesa/src/amd/vulkan/vk_format.h
@@ -24,13 +24,15 @@
* IN THE SOFTWARE.
*/
-#ifndef VK_FORMAT_H
-#define VK_FORMAT_H
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
#include <assert.h>
#include <vulkan/vulkan.h>
#include <util/macros.h>
-
enum vk_format_layout {
/**
* Formats with vk_format_block::width == vk_format_block::height == 1
@@ -444,5 +446,6 @@ vk_format_get_component_bits(VkFormat format,
return 0;
}
}
-
-#endif /* VK_FORMAT_H */
+#ifdef __cplusplus
+} // extern "C" {
+#endif
diff --git a/lib/mesa/src/amd/vulkan/vk_format_parse.py b/lib/mesa/src/amd/vulkan/vk_format_parse.py
index 00cf1adf5..b743fc2bd 100755
--- a/lib/mesa/src/amd/vulkan/vk_format_parse.py
+++ b/lib/mesa/src/amd/vulkan/vk_format_parse.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
'''
/**************************************************************************
diff --git a/lib/mesa/src/amd/vulkan/vk_format_table.py b/lib/mesa/src/amd/vulkan/vk_format_table.py
index 36352b108..06b98e568 100755
--- a/lib/mesa/src/amd/vulkan/vk_format_table.py
+++ b/lib/mesa/src/amd/vulkan/vk_format_table.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
CopyRight = '''
/**************************************************************************
diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
index 7b679450c..7319a9888 100644
--- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
+++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
@@ -34,210 +34,19 @@
#include <amdgpu_drm.h>
#include <inttypes.h>
-#include "util/u_atomic.h"
-
-
-static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);
-
-static void
-radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
- const struct radv_amdgpu_map_range *range)
-{
- assert(range->size);
-
- if (!range->bo)
- return; /* TODO: PRT mapping */
-
- p_atomic_inc(&range->bo->ref_count);
- int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
- range->offset + bo->va, 0, AMDGPU_VA_OP_MAP);
- if (r)
- abort();
-}
-
-static void
-radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
- const struct radv_amdgpu_map_range *range)
-{
- assert(range->size);
-
- if (!range->bo)
- return; /* TODO: PRT mapping */
-
- int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
- range->offset + bo->va, 0, AMDGPU_VA_OP_UNMAP);
- if (r)
- abort();
- radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
-}
-
-static void
-radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
-{
- bo->bo_count = 0;
- for (uint32_t i = 0; i < bo->range_count; ++i) {
- bool found = false;
- if (!bo->ranges[i].bo)
- continue;
-
- for(uint32_t j = 0; j < bo->bo_count; ++j) {
- if (bo->bos[j] == bo->ranges[i].bo) {
- found = true;
- break;
- }
- }
-
- if (!found) {
- if (bo->bo_capacity == bo->bo_count) {
- bo->bos = realloc(bo->bos,
- (bo->bo_capacity + 1) * sizeof(struct radv_amdgpu_bo *));
- ++bo->bo_capacity;
- }
- bo->bos[bo->bo_count++] = bo->ranges[i].bo;
- }
- }
-}
-
-static void
-radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
- uint64_t offset, uint64_t size,
- struct radeon_winsys_bo *_bo, uint64_t bo_offset)
-{
- struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
- struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
- int range_count_delta, new_idx;
- int first = 0, last;
- struct radv_amdgpu_map_range new_first, new_last;
-
- assert(parent->is_virtual);
- assert(!bo || !bo->is_virtual);
-
- if (!size)
- return;
-
- /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
- if (parent->range_capacity - parent->range_count < 2) {
- parent->range_capacity += 2;
- parent->ranges = realloc(parent->ranges,
- parent->range_capacity * sizeof(struct radv_amdgpu_map_range));
- }
-
- /*
- * [first, last] is exactly the range of ranges that either overlap the
- * new parent, or are adjacent to it. This corresponds to the bind ranges
- * that may change.
- */
- while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
- ++first;
-
- last = first;
- while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size)
- ++last;
-
- /* Whether the first or last range are going to be totally removed or just
- * resized/left alone. Note that in the case of first == last, we will split
- * this into a part before and after the new range. The remove flag is then
- * whether to not create the corresponding split part. */
- bool remove_first = parent->ranges[first].offset == offset;
- bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
- bool unmapped_first = false;
-
- assert(parent->ranges[first].offset <= offset);
- assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
-
- /* Try to merge the new range with the first range. */
- if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
- size += offset - parent->ranges[first].offset;
- offset = parent->ranges[first].offset;
- remove_first = true;
- }
-
- /* Try to merge the new range with the last range. */
- if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
- size = parent->ranges[last].offset + parent->ranges[last].size - offset;
- remove_last = true;
- }
-
- range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
- new_idx = first + !remove_first;
-
- /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
- for (int i = first + 1; i < last; ++i)
- radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i);
-
- /* If the first/last range are not left alone we unmap then and optionally map
- * them again after modifications. Not that this implicitly can do the splitting
- * if first == last. */
- new_first = parent->ranges[first];
- new_last = parent->ranges[last];
-
- if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
- radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first);
- unmapped_first = true;
-
- if (!remove_first) {
- new_first.size = offset - new_first.offset;
- radv_amdgpu_winsys_virtual_map(parent, &new_first);
- }
- }
-
- if (parent->ranges[last].offset < offset + size || remove_last) {
- if (first != last || !unmapped_first)
- radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last);
-
- if (!remove_last) {
- new_last.size -= offset + size - new_last.offset;
- new_last.offset = offset + size;
- radv_amdgpu_winsys_virtual_map(parent, &new_last);
- }
- }
-
- /* Moves the range list after last to account for the changed number of ranges. */
- memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
- sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
-
- if (!remove_first)
- parent->ranges[first] = new_first;
-
- if (!remove_last)
- parent->ranges[new_idx + 1] = new_last;
-
- /* Actually set up the new range. */
- parent->ranges[new_idx].offset = offset;
- parent->ranges[new_idx].size = size;
- parent->ranges[new_idx].bo = bo;
- parent->ranges[new_idx].bo_offset = bo_offset;
-
- radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx);
-
- parent->range_count += range_count_delta;
-
- radv_amdgpu_winsys_rebuild_bo_list(parent);
-}
-
static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
{
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
- if (p_atomic_dec_return(&bo->ref_count))
- return;
- if (bo->is_virtual) {
- for (uint32_t i = 0; i < bo->range_count; ++i) {
- radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
- }
- free(bo->bos);
- free(bo->ranges);
- } else {
- if (bo->ws->debug_all_bos) {
- pthread_mutex_lock(&bo->ws->global_bo_list_lock);
- LIST_DEL(&bo->global_list_item);
- bo->ws->num_buffers--;
- pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
- }
- amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
- amdgpu_bo_free(bo->bo);
+ if (bo->ws->debug_all_bos) {
+ pthread_mutex_lock(&bo->ws->global_bo_list_lock);
+ LIST_DEL(&bo->global_list_item);
+ bo->ws->num_buffers--;
+ pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
}
+ amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
amdgpu_va_range_free(bo->va_handle);
+ amdgpu_bo_free(bo->bo);
FREE(bo);
}
@@ -272,32 +81,6 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
return NULL;
}
- r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
- size, alignment, 0, &va, &va_handle, 0);
- if (r)
- goto error_va_alloc;
-
- bo->va = va;
- bo->va_handle = va_handle;
- bo->size = size;
- bo->ws = ws;
- bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
- bo->ref_count = 1;
-
- if (flags & RADEON_FLAG_VIRTUAL) {
- bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
- bo->range_count = 1;
- bo->range_capacity = 1;
-
- bo->ranges[0].offset = 0;
- bo->ranges[0].size = size;
- bo->ranges[0].bo = NULL;
- bo->ranges[0].bo_offset = 0;
-
- radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
- return (struct radeon_winsys_bo *)bo;
- }
-
request.alloc_size = size;
request.phys_alignment = alignment;
@@ -322,22 +105,31 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
goto error_bo_alloc;
}
+ r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
+ size, alignment, 0, &va, &va_handle, 0);
+ if (r)
+ goto error_va_alloc;
+
r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
if (r)
goto error_va_map;
bo->bo = buf_handle;
+ bo->va = va;
+ bo->va_handle = va_handle;
bo->initial_domain = initial_domain;
+ bo->size = size;
bo->is_shared = false;
+ bo->ws = ws;
radv_amdgpu_add_buffer_to_global_list(bo);
return (struct radeon_winsys_bo *)bo;
error_va_map:
- amdgpu_bo_free(buf_handle);
-
-error_bo_alloc:
amdgpu_va_range_free(va_handle);
error_va_alloc:
+ amdgpu_bo_free(buf_handle);
+
+error_bo_alloc:
FREE(bo);
return NULL;
}
@@ -413,8 +205,6 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
bo->initial_domain = initial;
bo->size = result.alloc_size;
bo->is_shared = true;
- bo->ws = ws;
- radv_amdgpu_add_buffer_to_global_list(bo);
return (struct radeon_winsys_bo *)bo;
error_va_map:
amdgpu_va_range_free(va_handle);
@@ -504,5 +294,4 @@ void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
- ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
}
diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
index 4512e76b3..499b063d5 100644
--- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
+++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
@@ -31,41 +31,17 @@
#include "radv_amdgpu_winsys.h"
-
-struct radv_amdgpu_map_range {
- uint64_t offset;
- uint64_t size;
- struct radv_amdgpu_winsys_bo *bo;
- uint64_t bo_offset;
-};
-
struct radv_amdgpu_winsys_bo {
+ amdgpu_bo_handle bo;
amdgpu_va_handle va_handle;
+
uint64_t va;
+ enum radeon_bo_domain initial_domain;
uint64_t size;
- struct radv_amdgpu_winsys *ws;
- bool is_virtual;
- int ref_count;
+ bool is_shared;
- union {
- /* physical bo */
- struct {
- amdgpu_bo_handle bo;
- enum radeon_bo_domain initial_domain;
- bool is_shared;
- struct list_head global_list_item;
- };
- /* virtual bo */
- struct {
- struct radv_amdgpu_map_range *ranges;
- uint32_t range_count;
- uint32_t range_capacity;
-
- struct radv_amdgpu_winsys_bo **bos;
- uint32_t bo_count;
- uint32_t bo_capacity;
- };
- };
+ struct radv_amdgpu_winsys *ws;
+ struct list_head global_list_item;
};
static inline
diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index ca7d647fd..b8558fafc 100644
--- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -27,18 +27,12 @@
#include <amdgpu_drm.h>
#include <assert.h>
-#include "ac_debug.h"
#include "amdgpu_id.h"
#include "radv_radeon_winsys.h"
#include "radv_amdgpu_cs.h"
#include "radv_amdgpu_bo.h"
#include "sid.h"
-
-enum {
- VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024
-};
-
struct radv_amdgpu_cs {
struct radeon_winsys_cs base;
struct radv_amdgpu_winsys *ws;
@@ -60,13 +54,6 @@ struct radv_amdgpu_cs {
bool is_chained;
int buffer_hash_table[1024];
- unsigned hw_ip;
-
- unsigned num_virtual_buffers;
- unsigned max_num_virtual_buffers;
- struct radeon_winsys_bo **virtual_buffers;
- uint8_t *virtual_buffer_priorities;
- int *virtual_buffer_hash_table;
};
static inline struct radv_amdgpu_cs *
@@ -75,30 +62,6 @@ radv_amdgpu_cs(struct radeon_winsys_cs *base)
return (struct radv_amdgpu_cs*)base;
}
-static int ring_to_hw_ip(enum ring_type ring)
-{
- switch (ring) {
- case RING_GFX:
- return AMDGPU_HW_IP_GFX;
- case RING_DMA:
- return AMDGPU_HW_IP_DMA;
- case RING_COMPUTE:
- return AMDGPU_HW_IP_COMPUTE;
- default:
- unreachable("unsupported ring");
- }
-}
-
-static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
- struct amdgpu_cs_fence *fence,
- struct amdgpu_cs_request *req)
-{
- fence->context = ctx->ctx;
- fence->ip_type = req->ip_type;
- fence->ip_instance = req->ip_instance;
- fence->ring = req->ring;
- fence->fence = req->seq_no;
-}
static struct radeon_winsys_fence *radv_amdgpu_create_fence()
{
@@ -152,9 +115,6 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
free(cs->old_ib_buffers);
- free(cs->virtual_buffers);
- free(cs->virtual_buffer_priorities);
- free(cs->virtual_buffer_hash_table);
free(cs->handles);
free(cs->priorities);
free(cs);
@@ -166,7 +126,6 @@ static boolean radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs,
for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i)
cs->buffer_hash_table[i] = -1;
- cs->hw_ip = ring_to_hw_ip(ring_type);
return true;
}
@@ -181,7 +140,7 @@ radv_amdgpu_cs_create(struct radeon_winsys *ws,
return NULL;
cs->ws = radv_amdgpu_winsys(ws);
- radv_amdgpu_init_cs(cs, ring_type);
+ radv_amdgpu_init_cs(cs, RING_GFX);
if (cs->ws->use_ib_bos) {
cs->ib_buffer = ws->buffer_create(ws, ib_size, 0,
@@ -329,13 +288,7 @@ static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
cs->buffer_hash_table[hash] = -1;
}
- for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
- unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
- cs->virtual_buffer_hash_table[hash] = -1;
- }
-
cs->num_buffers = 0;
- cs->num_virtual_buffers = 0;
if (cs->ws->use_ib_bos) {
cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
@@ -400,49 +353,6 @@ static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
++cs->num_buffers;
}
-static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_winsys_cs *_cs,
- struct radeon_winsys_bo *bo,
- uint8_t priority)
-{
- struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
- unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
-
-
- if (!cs->virtual_buffer_hash_table) {
- cs->virtual_buffer_hash_table = malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int));
- for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i)
- cs->virtual_buffer_hash_table[i] = -1;
- }
-
- if (cs->virtual_buffer_hash_table[hash] >= 0) {
- int idx = cs->virtual_buffer_hash_table[hash];
- if (cs->virtual_buffers[idx] == bo) {
- cs->virtual_buffer_priorities[idx] = MAX2(cs->virtual_buffer_priorities[idx], priority);
- return;
- }
- for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
- if (cs->virtual_buffers[i] == bo) {
- cs->virtual_buffer_priorities[i] = MAX2(cs->virtual_buffer_priorities[i], priority);
- cs->virtual_buffer_hash_table[hash] = i;
- return;
- }
- }
- }
-
- if(cs->max_num_virtual_buffers <= cs->num_virtual_buffers) {
- cs->max_num_virtual_buffers = MAX2(2, cs->max_num_virtual_buffers * 2);
- cs->virtual_buffers = realloc(cs->virtual_buffers, sizeof(struct radv_amdgpu_virtual_virtual_buffer*) * cs->max_num_virtual_buffers);
- cs->virtual_buffer_priorities = realloc(cs->virtual_buffer_priorities, sizeof(uint8_t) * cs->max_num_virtual_buffers);
- }
-
- cs->virtual_buffers[cs->num_virtual_buffers] = bo;
- cs->virtual_buffer_priorities[cs->num_virtual_buffers] = priority;
-
- cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers;
- ++cs->num_virtual_buffers;
-
-}
-
static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
struct radeon_winsys_bo *_bo,
uint8_t priority)
@@ -450,11 +360,6 @@ static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
- if (bo->is_virtual) {
- radv_amdgpu_cs_add_virtual_buffer(_cs, _bo, priority);
- return;
- }
-
radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority);
}
@@ -469,11 +374,6 @@ static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
child->priorities[i]);
}
- for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
- radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i],
- child->virtual_buffer_priorities[i]);
- }
-
if (parent->ws->use_ib_bos) {
if (parent->base.cdw + 4 > parent->base.max_dw)
radv_amdgpu_cs_grow(&parent->base, 4);
@@ -495,7 +395,6 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
struct radeon_winsys_cs **cs_array,
unsigned count,
struct radv_amdgpu_winsys_bo *extra_bo,
- struct radeon_winsys_cs *extra_cs,
amdgpu_bo_list_handle *bo_list)
{
int r;
@@ -522,8 +421,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
bo_list);
free(handles);
pthread_mutex_unlock(&ws->global_bo_list_lock);
- } else if (count == 1 && !extra_bo && !extra_cs &&
- !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) {
+ } else if (count == 1 && !extra_bo) {
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
cs->priorities, bo_list);
@@ -533,12 +431,6 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
for (unsigned i = 0; i < count; ++i) {
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
total_buffer_count += cs->num_buffers;
- for (unsigned j = 0; j < cs->num_virtual_buffers; ++j)
- total_buffer_count += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count;
- }
-
- if (extra_cs) {
- total_buffer_count += ((struct radv_amdgpu_cs*)extra_cs)->num_buffers;
}
amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count);
@@ -554,27 +446,11 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
priorities[0] = 8;
}
- for (unsigned i = 0; i < count + !!extra_cs; ++i) {
- struct radv_amdgpu_cs *cs;
-
- if (i == count)
- cs = (struct radv_amdgpu_cs*)extra_cs;
- else
- cs = (struct radv_amdgpu_cs*)cs_array[i];
-
- if (!cs->num_buffers)
- continue;
-
- if (unique_bo_count == 0) {
- memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle));
- memcpy(priorities, cs->priorities, cs->num_buffers * sizeof(uint8_t));
- unique_bo_count = cs->num_buffers;
- continue;
- }
- int unique_bo_so_far = unique_bo_count;
+ for (unsigned i = 0; i < count; ++i) {
+ struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
for (unsigned j = 0; j < cs->num_buffers; ++j) {
bool found = false;
- for (unsigned k = 0; k < unique_bo_so_far; ++k) {
+ for (unsigned k = 0; k < unique_bo_count; ++k) {
if (handles[k] == cs->handles[j]) {
found = true;
priorities[k] = MAX2(priorities[k],
@@ -588,26 +464,6 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
++unique_bo_count;
}
}
- for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) {
- struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
- for(unsigned k = 0; k < virtual_bo->bo_count; ++k) {
- struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
- bool found = false;
- for (unsigned m = 0; m < unique_bo_count; ++m) {
- if (handles[m] == bo->bo) {
- found = true;
- priorities[m] = MAX2(priorities[m],
- cs->virtual_buffer_priorities[j]);
- break;
- }
- }
- if (!found) {
- handles[unique_bo_count] = bo->bo;
- priorities[unique_bo_count] = cs->virtual_buffer_priorities[j];
- ++unique_bo_count;
- }
- }
- }
}
r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles,
priorities, bo_list);
@@ -619,20 +475,9 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
return r;
}
-static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
- struct amdgpu_cs_request *request)
-{
- radv_amdgpu_request_to_fence(ctx,
- &ctx->last_submission[request->ip_type][request->ring],
- request);
-}
-
static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
- int queue_idx,
struct radeon_winsys_cs **cs_array,
unsigned cs_count,
- struct radeon_winsys_cs *initial_preamble_cs,
- struct radeon_winsys_cs *continue_preamble_cs,
struct radeon_winsys_fence *_fence)
{
int r;
@@ -641,7 +486,6 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
amdgpu_bo_list_handle bo_list;
struct amdgpu_cs_request request = {0};
- struct amdgpu_cs_ib_info ibs[2];
for (unsigned i = cs_count; i--;) {
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
@@ -665,25 +509,17 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
}
}
- r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs, &bo_list);
+ r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, &bo_list);
if (r) {
fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
return r;
}
- request.ip_type = cs0->hw_ip;
- request.ring = queue_idx;
+ request.ip_type = AMDGPU_HW_IP_GFX;
request.number_of_ibs = 1;
request.ibs = &cs0->ib;
request.resources = bo_list;
- if (initial_preamble_cs) {
- request.ibs = ibs;
- request.number_of_ibs = 2;
- ibs[1] = cs0->ib;
- ibs[0] = ((struct radv_amdgpu_cs*)initial_preamble_cs)->ib;
- }
-
r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
if (r) {
if (r == -ENOMEM)
@@ -695,20 +531,21 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
amdgpu_bo_list_destroy(bo_list);
- if (fence)
- radv_amdgpu_request_to_fence(ctx, fence, &request);
-
- radv_assign_last_submit(ctx, &request);
+ if (fence) {
+ fence->context = ctx->ctx;
+ fence->ip_type = request.ip_type;
+ fence->ip_instance = request.ip_instance;
+ fence->ring = request.ring;
+ fence->fence = request.seq_no;
+ }
+ ctx->last_seq_no = request.seq_no;
return r;
}
static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
- int queue_idx,
struct radeon_winsys_cs **cs_array,
unsigned cs_count,
- struct radeon_winsys_cs *initial_preamble_cs,
- struct radeon_winsys_cs *continue_preamble_cs,
struct radeon_winsys_fence *_fence)
{
int r;
@@ -722,32 +559,24 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
for (unsigned i = 0; i < cs_count;) {
struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
- struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
- unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs,
- cs_count - i);
+ unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT, cs_count - i);
memset(&request, 0, sizeof(request));
- r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL,
- preamble_cs, &bo_list);
+ r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, &bo_list);
if (r) {
fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
return r;
}
- request.ip_type = cs0->hw_ip;
- request.ring = queue_idx;
+ request.ip_type = AMDGPU_HW_IP_GFX;
request.resources = bo_list;
- request.number_of_ibs = cnt + !!preamble_cs;
+ request.number_of_ibs = cnt;
request.ibs = ibs;
- if (preamble_cs) {
- ibs[0] = radv_amdgpu_cs(preamble_cs)->ib;
- }
-
for (unsigned j = 0; j < cnt; ++j) {
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
- ibs[j + !!preamble_cs] = cs->ib;
+ ibs[j] = cs->ib;
if (cs->is_chained) {
*cs->ib_size_ptr -= 4;
@@ -771,20 +600,21 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
i += cnt;
}
- if (fence)
- radv_amdgpu_request_to_fence(ctx, fence, &request);
-
- radv_assign_last_submit(ctx, &request);
+ if (fence) {
+ fence->context = ctx->ctx;
+ fence->ip_type = request.ip_type;
+ fence->ip_instance = request.ip_instance;
+ fence->ring = request.ring;
+ fence->fence = request.seq_no;
+ }
+ ctx->last_seq_no = request.seq_no;
return 0;
}
static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
- int queue_idx,
struct radeon_winsys_cs **cs_array,
unsigned cs_count,
- struct radeon_winsys_cs *initial_preamble_cs,
- struct radeon_winsys_cs *continue_preamble_cs,
struct radeon_winsys_fence *_fence)
{
int r;
@@ -804,14 +634,10 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
for (unsigned i = 0; i < cs_count;) {
struct amdgpu_cs_ib_info ib = {0};
struct radeon_winsys_bo *bo = NULL;
- struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
uint32_t *ptr;
unsigned cnt = 0;
unsigned size = 0;
- if (preamble_cs)
- size += preamble_cs->cdw;
-
while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
++cnt;
@@ -822,11 +648,6 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
ptr = ws->buffer_map(bo);
- if (preamble_cs) {
- memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
- ptr += preamble_cs->cdw;
- }
-
for (unsigned j = 0; j < cnt; ++j) {
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
@@ -843,8 +664,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
- (struct radv_amdgpu_winsys_bo*)bo,
- preamble_cs, &bo_list);
+ (struct radv_amdgpu_winsys_bo*)bo, &bo_list);
if (r) {
fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
return r;
@@ -853,8 +673,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
ib.size = size;
ib.ib_mc_address = ws->buffer_get_va(bo);
- request.ip_type = cs0->hw_ip;
- request.ring = queue_idx;
+ request.ip_type = AMDGPU_HW_IP_GFX;
request.resources = bo_list;
request.number_of_ibs = 1;
request.ibs = &ib;
@@ -876,92 +695,35 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
i += cnt;
}
- if (fence)
- radv_amdgpu_request_to_fence(ctx, fence, &request);
-
- radv_assign_last_submit(ctx, &request);
+ if (fence) {
+ fence->context = ctx->ctx;
+ fence->ip_type = request.ip_type;
+ fence->ip_instance = request.ip_instance;
+ fence->ring = request.ring;
+ fence->fence = request.seq_no;
+ }
+ ctx->last_seq_no = request.seq_no;
return 0;
}
static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
- int queue_idx,
struct radeon_winsys_cs **cs_array,
unsigned cs_count,
- struct radeon_winsys_cs *initial_preamble_cs,
- struct radeon_winsys_cs *continue_preamble_cs,
- struct radeon_winsys_sem **wait_sem,
- unsigned wait_sem_count,
- struct radeon_winsys_sem **signal_sem,
- unsigned signal_sem_count,
bool can_patch,
struct radeon_winsys_fence *_fence)
{
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
- struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
- int ret;
- int i;
-
- for (i = 0; i < wait_sem_count; i++) {
- amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)wait_sem[i];
- amdgpu_cs_wait_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx,
- sem);
- }
if (!cs->ws->use_ib_bos) {
- ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array,
- cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
+ return radv_amdgpu_winsys_cs_submit_sysmem(_ctx, cs_array,
+ cs_count, _fence);
} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
- ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array,
- cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
+ return radv_amdgpu_winsys_cs_submit_chained(_ctx, cs_array,
+ cs_count, _fence);
} else {
- ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, cs_array,
- cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
- }
-
- for (i = 0; i < signal_sem_count; i++) {
- amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)signal_sem[i];
- amdgpu_cs_signal_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx,
- sem);
- }
- return ret;
-}
-
-
-static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
-{
- struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
- void *ret = NULL;
-
- if (!cs->ib_buffer)
- return NULL;
- for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
- struct radv_amdgpu_winsys_bo *bo;
-
- bo = (struct radv_amdgpu_winsys_bo*)
- (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
- if (addr >= bo->va && addr - bo->va < bo->size) {
- if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
- return (char *)ret + (addr - bo->va);
- }
- }
- return ret;
-}
-
-static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
- FILE* file,
- uint32_t trace_id)
-{
- struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
- void *ib = cs->base.buf;
- int num_dw = cs->base.cdw;
-
- if (cs->ws->use_ib_bos) {
- ib = radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address);
- num_dw = cs->ib.size;
+ return radv_amdgpu_winsys_cs_submit_fallback(_ctx, cs_array,
+ cs_count, _fence);
}
- assert(ib);
- ac_parse_ib(file, ib, num_dw, trace_id, "main IB", cs->ws->info.chip_class,
- radv_amdgpu_winsys_get_cpu_addr, cs);
}
static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
@@ -980,7 +742,6 @@ static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_w
ctx->ws = ws;
return (struct radeon_winsys_ctx *)ctx;
error_create:
- FREE(ctx);
return NULL;
}
@@ -991,16 +752,22 @@ static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
FREE(ctx);
}
-static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
- enum ring_type ring_type, int ring_index)
+static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx)
{
struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
- int ip_type = ring_to_hw_ip(ring_type);
- if (ctx->last_submission[ip_type][ring_index].fence) {
+ if (ctx->last_seq_no) {
uint32_t expired;
- int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index],
- 1000000000ull, 0, &expired);
+ struct amdgpu_cs_fence fence;
+
+ fence.context = ctx->ctx;
+ fence.ip_type = RING_GFX;
+ fence.ip_instance = 0;
+ fence.ring = 0;
+ fence.fence = ctx->last_seq_no;
+
+ int ret = amdgpu_cs_query_fence_status(&fence, 1000000000ull, 0,
+ &expired);
if (ret || !expired)
return false;
@@ -1009,23 +776,6 @@ static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
return true;
}
-static struct radeon_winsys_sem *radv_amdgpu_create_sem(struct radeon_winsys *_ws)
-{
- int ret;
- amdgpu_semaphore_handle sem;
-
- ret = amdgpu_cs_create_semaphore(&sem);
- if (ret)
- return NULL;
- return (struct radeon_winsys_sem *)sem;
-}
-
-static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem *_sem)
-{
- amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)_sem;
- amdgpu_cs_destroy_semaphore(sem);
-}
-
void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
{
ws->base.ctx_create = radv_amdgpu_ctx_create;
@@ -1039,10 +789,7 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
- ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
ws->base.create_fence = radv_amdgpu_create_fence;
ws->base.destroy_fence = radv_amdgpu_destroy_fence;
- ws->base.create_sem = radv_amdgpu_create_sem;
- ws->base.destroy_sem = radv_amdgpu_destroy_sem;
ws->base.fence_wait = radv_amdgpu_fence_wait;
}
diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
index fc6a2c8ef..affee9528 100644
--- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
+++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
@@ -38,14 +38,10 @@
#include "radv_radeon_winsys.h"
#include "radv_amdgpu_winsys.h"
-enum {
- MAX_RINGS_PER_TYPE = 8
-};
-
struct radv_amdgpu_ctx {
struct radv_amdgpu_winsys *ws;
amdgpu_context_handle ctx;
- struct amdgpu_cs_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
+ uint64_t last_seq_no;
};
static inline struct radv_amdgpu_ctx *
diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
index 511f464df..02aad3c81 100644
--- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
+++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
@@ -140,6 +140,7 @@ ADDR_HANDLE radv_amdgpu_addr_create(struct amdgpu_gpu_info *amdinfo, int family,
createFlags.value = 0;
createFlags.useTileIndex = 1;
+ createFlags.degradeBaseLevel = 1;
addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
addrCreateInput.chipFamily = family;
@@ -259,30 +260,6 @@ static int radv_compute_level(ADDR_HANDLE addrlib,
}
}
- if (!is_stencil && AddrSurfInfoIn->flags.depth &&
- surf_level->mode == RADEON_SURF_MODE_2D && level == 0) {
- ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
- ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
- AddrHtileIn.flags.tcCompatible = AddrSurfInfoIn->flags.tcCompatible;
- AddrHtileIn.pitch = AddrSurfInfoOut->pitch;
- AddrHtileIn.height = AddrSurfInfoOut->height;
- AddrHtileIn.numSlices = AddrSurfInfoOut->depth;
- AddrHtileIn.blockWidth = ADDR_HTILE_BLOCKSIZE_8;
- AddrHtileIn.blockHeight = ADDR_HTILE_BLOCKSIZE_8;
- AddrHtileIn.pTileInfo = AddrSurfInfoOut->pTileInfo;
- AddrHtileIn.tileIndex = AddrSurfInfoOut->tileIndex;
- AddrHtileIn.macroModeIndex = AddrSurfInfoOut->macroModeIndex;
-
- ret = AddrComputeHtileInfo(addrlib,
- &AddrHtileIn,
- &AddrHtileOut);
-
- if (ret == ADDR_OK) {
- surf->htile_size = AddrHtileOut.htileBytes;
- surf->htile_slice_size = AddrHtileOut.sliceSize;
- surf->htile_alignment = AddrHtileOut.baseAlign;
- }
- }
return 0;
}
@@ -297,19 +274,6 @@ static void radv_set_micro_tile_mode(struct radeon_surf *surf,
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
}
-static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
-{
- unsigned index, tileb;
-
- tileb = 8 * 8 * surf->bpe;
- tileb = MIN2(surf->tile_split, tileb);
-
- for (index = 0; tileb > 64; index++)
- tileb >>= 1;
-
- assert(index < 16);
- return index;
-}
static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
struct radeon_surf *surf)
@@ -397,7 +361,7 @@ static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
- AddrSurfInfoIn.flags.opt4Space = 1;
+ AddrSurfInfoIn.flags.degrade4Space = 1;
/* DCC notes:
* - If we add MSAA support, keep in mind that CB can't decompress 8bpp
@@ -436,7 +400,7 @@ static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
AddrTileInfoIn.macroAspectRatio = surf->mtilea;
AddrTileInfoIn.tileSplitBytes = surf->tile_split;
AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to GB_TILE_MODE */
- AddrSurfInfoIn.flags.opt4Space = 0;
+ AddrSurfInfoIn.flags.degrade4Space = 0;
AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
/* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
@@ -471,22 +435,19 @@ static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
else
AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
- AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
}
}
surf->bo_size = 0;
surf->dcc_size = 0;
surf->dcc_alignment = 1;
- surf->htile_size = surf->htile_slice_size = 0;
- surf->htile_alignment = 1;
/* Calculate texture layout information. */
for (level = 0; level <= surf->last_level; level++) {
r = radv_compute_level(ws->addrlib, surf, false, level, type, compressed,
&AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
if (r)
- break;
+ return r;
if (level == 0) {
surf->bo_alignment = AddrSurfInfoOut.baseAlign;
diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
index 629da3153..045610072 100644
--- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
+++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
@@ -27,7 +27,6 @@
#include "radv_amdgpu_winsys.h"
#include "radv_amdgpu_winsys_public.h"
#include "radv_amdgpu_surface.h"
-#include "radv_debug.h"
#include "amdgpu_id.h"
#include "xf86drm.h"
#include <stdio.h>
@@ -107,7 +106,6 @@ get_chip_name(enum radeon_family family)
case CHIP_FIJI: return "AMD RADV FIJI";
case CHIP_POLARIS10: return "AMD RADV POLARIS10";
case CHIP_POLARIS11: return "AMD RADV POLARIS11";
- case CHIP_POLARIS12: return "AMD RADV POLARIS12";
case CHIP_STONEY: return "AMD RADV STONEY";
default: return "AMD RADV unknown";
}
@@ -118,16 +116,15 @@ static bool
do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
{
struct amdgpu_buffer_size_alignments alignment_info = {};
- struct amdgpu_heap_info vram, visible_vram, gtt;
+ struct amdgpu_heap_info vram, gtt;
struct drm_amdgpu_info_hw_ip dma = {};
- struct drm_amdgpu_info_hw_ip compute = {};
drmDevicePtr devinfo;
int r;
int i, j;
/* Get PCI info. */
- r = drmGetDevice2(fd, 0, &devinfo);
+ r = drmGetDevice(fd, &devinfo);
if (r) {
- fprintf(stderr, "amdgpu: drmGetDevice2 failed.\n");
+ fprintf(stderr, "amdgpu: drmGetDevice failed.\n");
goto fail;
}
ws->info.pci_domain = devinfo->businfo.pci->domain;
@@ -155,13 +152,6 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
goto fail;
}
- r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &visible_vram);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_heap_info(visible_vram) failed.\n");
- goto fail;
- }
-
r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &gtt);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
@@ -173,12 +163,6 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n");
goto fail;
}
-
- r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_COMPUTE, 0, &compute);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(compute) failed.\n");
- goto fail;
- }
ws->info.pci_id = ws->amdinfo.asic_id; /* TODO: is this correct? */
ws->info.vce_harvest_config = ws->amdinfo.vce_harvest_config;
@@ -272,10 +256,6 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
ws->family = FAMILY_VI;
ws->rev_id = VI_POLARIS11_M_A0;
break;
- case CHIP_POLARIS12:
- ws->family = FAMILY_VI;
- ws->rev_id = VI_POLARIS12_V_A0;
- break;
default:
fprintf(stderr, "amdgpu: Unknown family.\n");
goto fail;
@@ -286,15 +266,10 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
goto fail;
}
-
- assert(util_is_power_of_two(dma.available_rings + 1));
- assert(util_is_power_of_two(compute.available_rings + 1));
-
/* Set hardware information. */
ws->info.name = get_chip_name(ws->info.family);
ws->info.gart_size = gtt.heap_size;
ws->info.vram_size = vram.heap_size;
- ws->info.visible_vram_size = visible_vram.heap_size;
/* convert the shader clock from KHz to MHz */
ws->info.max_shader_clock = ws->amdinfo.max_engine_clk / 1000;
ws->info.max_se = ws->amdinfo.num_shader_engines;
@@ -307,10 +282,7 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
ws->info.num_tile_pipes = radv_cik_get_num_tile_pipes(&ws->amdinfo);
ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7);
ws->info.has_virtual_memory = TRUE;
- ws->info.sdma_rings = MIN2(util_bitcount(dma.available_rings),
- MAX_RINGS_PER_TYPE);
- ws->info.compute_rings = MIN2(util_bitcount(compute.available_rings),
- MAX_RINGS_PER_TYPE);
+ ws->info.has_sdma = dma.available_rings != 0;
/* Get the number of good compute units. */
ws->info.num_good_compute_units = 0;
@@ -353,7 +325,7 @@ static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
}
struct radeon_winsys *
-radv_amdgpu_winsys_create(int fd, uint32_t debug_flags)
+radv_amdgpu_winsys_create(int fd)
{
uint32_t drm_major, drm_minor, r;
amdgpu_device_handle dev;
@@ -373,10 +345,7 @@ radv_amdgpu_winsys_create(int fd, uint32_t debug_flags)
if (!do_winsys_init(ws, fd))
goto winsys_fail;
- ws->debug_all_bos = !!(debug_flags & RADV_DEBUG_ALL_BOS);
- if (debug_flags & RADV_DEBUG_NO_IBS)
- ws->use_ib_bos = false;
-
+ ws->debug_all_bos = getenv("RADV_DEBUG_ALL_BOS") ? true : false;
LIST_INITHEAD(&ws->global_bo_list);
pthread_mutex_init(&ws->global_bo_list_lock, NULL);
ws->base.query_info = radv_amdgpu_winsys_query_info;
diff --git a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h
index d5d0ff52c..208561db9 100644
--- a/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h
+++ b/lib/mesa/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h
@@ -29,6 +29,6 @@
#ifndef RADV_AMDGPU_WINSYS_PUBLIC_H
#define RADV_AMDGPU_WINSYS_PUBLIC_H
-struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint32_t debug_flags);
+struct radeon_winsys *radv_amdgpu_winsys_create(int fd);
#endif /* RADV_AMDGPU_WINSYS_PUBLIC_H */