diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2023-11-02 04:34:57 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2023-11-02 04:34:57 +0000 |
commit | 32aeb3c41fedbbd7b11aacfec48e8f699d16bff0 (patch) | |
tree | fc5893a490729ebf6b87b83eebf5d4ebfdfccf27 /lib/mesa/src/intel/vulkan | |
parent | 286ec9d289bada8abb84753c461cfa3432866e98 (diff) |
Import Mesa 23.1.9
Diffstat (limited to 'lib/mesa/src/intel/vulkan')
47 files changed, 5966 insertions, 386 deletions
diff --git a/lib/mesa/src/intel/vulkan/anv_android.c b/lib/mesa/src/intel/vulkan/anv_android.c index 8a17f0a24..6e98763dd 100644 --- a/lib/mesa/src/intel/vulkan/anv_android.c +++ b/lib/mesa/src/intel/vulkan/anv_android.c @@ -34,17 +34,14 @@ #include <sync/sync.h> #include "anv_private.h" +#include "vk_android.h" #include "vk_common_entrypoints.h" #include "vk_util.h" static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev); static int anv_hal_close(struct hw_device_t *dev); -static void UNUSED -static_asserts(void) -{ - STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC); -} +static_assert(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC, ""); PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = { .common = { @@ -142,8 +139,8 @@ vk_format_from_android(unsigned android_format, unsigned android_usage) } } -static inline unsigned -android_format_from_vk(unsigned vk_format) +unsigned +anv_ahb_format_for_vk_format(VkFormat vk_format) { switch (vk_format) { case VK_FORMAT_R8G8B8A8_UNORM: @@ -167,12 +164,6 @@ android_format_from_vk(unsigned vk_format) } } -static VkFormatFeatureFlags -features2_to_features(VkFormatFeatureFlags2 features2) -{ - return features2 & VK_ALL_FORMAT_FEATURE_FLAG_BITS; -} - static VkResult get_ahw_buffer_format_properties2( VkDevice device_h, @@ -201,9 +192,9 @@ get_ahw_buffer_format_properties2( VkAndroidHardwareBufferFormatProperties2ANDROID *p = pProperties; p->format = vk_format_from_android(desc.format, desc.usage); + p->externalFormat = p->format; const struct anv_format *anv_format = anv_get_format(p->format); - p->externalFormat = (uint64_t) (uintptr_t) anv_format; /* Default to OPTIMAL tiling but set to linear in case * of AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER usage. @@ -214,7 +205,7 @@ get_ahw_buffer_format_properties2( tiling = VK_IMAGE_TILING_LINEAR; p->formatFeatures = - anv_get_image_format_features2(device->info, p->format, anv_format, + anv_get_image_format_features2(device->physical, p->format, anv_format, tiling, NULL); /* "Images can be created with an external format even if the Android hardware @@ -274,7 +265,7 @@ anv_GetAndroidHardwareBufferPropertiesANDROID( format_prop->format = format_prop2.format; format_prop->externalFormat = format_prop2.externalFormat; format_prop->formatFeatures = - features2_to_features(format_prop2.formatFeatures); + vk_format_features2_to_features(format_prop2.formatFeatures); format_prop->samplerYcbcrConversionComponents = format_prop2.samplerYcbcrConversionComponents; format_prop->suggestedYcbcrModel = format_prop2.suggestedYcbcrModel; @@ -309,81 +300,21 @@ anv_GetAndroidHardwareBufferPropertiesANDROID( return VK_SUCCESS; } -VkResult -anv_GetMemoryAndroidHardwareBufferANDROID( - VkDevice device_h, - const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo, - struct AHardwareBuffer **pBuffer) -{ - ANV_FROM_HANDLE(anv_device_memory, mem, pInfo->memory); - - /* Some quotes from Vulkan spec: - * - * "If the device memory was created by importing an Android hardware - * buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same - * Android hardware buffer object." - * - * "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID must - * have been included in VkExportMemoryAllocateInfo::handleTypes when - * memory was created." - */ - if (mem->ahw) { - *pBuffer = mem->ahw; - /* Increase refcount. */ - AHardwareBuffer_acquire(mem->ahw); - return VK_SUCCESS; - } - - return VK_ERROR_OUT_OF_HOST_MEMORY; -} - -#endif - -/* Construct ahw usage mask from image usage bits, see - * 'AHardwareBuffer Usage Equivalence' in Vulkan spec. - */ -uint64_t -anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create, - const VkImageUsageFlags vk_usage) -{ - uint64_t ahw_usage = 0; -#if ANDROID_API_LEVEL >= 26 - if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT) - ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE; - - if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) - ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE; - - if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) - ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT; - - if (vk_create & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) - ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP; - - if (vk_create & VK_IMAGE_CREATE_PROTECTED_BIT) - ahw_usage |= AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT; - - /* No usage bits set - set at least one GPU usage. */ - if (ahw_usage == 0) - ahw_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE; #endif - return ahw_usage; -} /* * Called from anv_AllocateMemory when import AHardwareBuffer. */ VkResult anv_import_ahw_memory(VkDevice device_h, - struct anv_device_memory *mem, - const VkImportAndroidHardwareBufferInfoANDROID *info) + struct anv_device_memory *mem) { #if ANDROID_API_LEVEL >= 26 ANV_FROM_HANDLE(anv_device, device, device_h); /* Import from AHardwareBuffer to anv_device_memory. */ const native_handle_t *handle = - AHardwareBuffer_getNativeHandle(info->buffer); + AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer); /* NOTE - We support buffers with only one handle but do not error on * multiple handle case. Reason is that we want to support YUV formats @@ -399,14 +330,6 @@ anv_import_ahw_memory(VkDevice device_h, &mem->bo); assert(result == VK_SUCCESS); - /* "If the vkAllocateMemory command succeeds, the implementation must - * acquire a reference to the imported hardware buffer, which it must - * release when the device memory object is freed. If the command fails, - * the implementation must not retain a reference." - */ - AHardwareBuffer_acquire(info->buffer); - mem->ahw = info->buffer; - return VK_SUCCESS; #else return VK_ERROR_EXTENSION_NOT_PRESENT; @@ -414,70 +337,6 @@ anv_import_ahw_memory(VkDevice device_h, } VkResult -anv_create_ahw_memory(VkDevice device_h, - struct anv_device_memory *mem, - const VkMemoryAllocateInfo *pAllocateInfo) -{ -#if ANDROID_API_LEVEL >= 26 - const VkMemoryDedicatedAllocateInfo *dedicated_info = - vk_find_struct_const(pAllocateInfo->pNext, - MEMORY_DEDICATED_ALLOCATE_INFO); - - uint32_t w = 0; - uint32_t h = 1; - uint32_t layers = 1; - uint32_t format = 0; - uint64_t usage = 0; - - /* If caller passed dedicated information. */ - if (dedicated_info && dedicated_info->image) { - ANV_FROM_HANDLE(anv_image, image, dedicated_info->image); - w = image->vk.extent.width; - h = image->vk.extent.height; - layers = image->vk.array_layers; - format = android_format_from_vk(image->vk.format); - usage = anv_ahw_usage_from_vk_usage(image->vk.create_flags, image->vk.usage); - } else if (dedicated_info && dedicated_info->buffer) { - ANV_FROM_HANDLE(anv_buffer, buffer, dedicated_info->buffer); - w = buffer->vk.size; - format = AHARDWAREBUFFER_FORMAT_BLOB; - usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | - AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN; - } else { - w = pAllocateInfo->allocationSize; - format = AHARDWAREBUFFER_FORMAT_BLOB; - usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | - AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN; - } - - struct AHardwareBuffer *ahw = NULL; - struct AHardwareBuffer_Desc desc = { - .width = w, - .height = h, - .layers = layers, - .format = format, - .usage = usage, - }; - - if (AHardwareBuffer_allocate(&desc, &ahw) != 0) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - const VkImportAndroidHardwareBufferInfoANDROID import_info = { - .buffer = ahw, - }; - VkResult result = anv_import_ahw_memory(device_h, mem, &import_info); - - /* Release a reference to avoid leak for AHB allocation. */ - AHardwareBuffer_release(ahw); - - return result; -#else - return VK_ERROR_EXTENSION_NOT_PRESENT; -#endif - -} - -VkResult anv_image_init_from_gralloc(struct anv_device *device, struct anv_image *image, const VkImageCreateInfo *base_info, @@ -536,6 +395,8 @@ anv_image_init_from_gralloc(struct anv_device *device, base_info->tiling); assert(format != ISL_FORMAT_UNSUPPORTED); + anv_info.stride = gralloc_info->stride * (isl_format_get_layout(format)->bpb / 8); + result = anv_image_init(device, image, &anv_info); if (result != VK_SUCCESS) goto fail_init; @@ -548,8 +409,8 @@ anv_image_init_from_gralloc(struct anv_device *device, &mem_reqs); VkDeviceSize aligned_image_size = - align_u64(mem_reqs.memoryRequirements.size, - mem_reqs.memoryRequirements.alignment); + align64(mem_reqs.memoryRequirements.size, + mem_reqs.memoryRequirements.alignment); if (bo->size < aligned_image_size) { result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE, diff --git a/lib/mesa/src/intel/vulkan/anv_android.h b/lib/mesa/src/intel/vulkan/anv_android.h index 4490d3b24..e1f099e1f 100644 --- a/lib/mesa/src/intel/vulkan/anv_android.h +++ b/lib/mesa/src/intel/vulkan/anv_android.h @@ -44,14 +44,12 @@ VkResult anv_image_bind_from_gralloc(struct anv_device *device, struct anv_image *image, const VkNativeBufferANDROID *gralloc_info); -uint64_t anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create, - const VkImageUsageFlags vk_usage); +unsigned anv_ahb_format_for_vk_format(VkFormat vk_format); VkResult anv_import_ahw_memory(VkDevice device_h, - struct anv_device_memory *mem, - const VkImportAndroidHardwareBufferInfoANDROID *info); + struct anv_device_memory *mem); VkResult anv_create_ahw_memory(VkDevice device_h, struct anv_device_memory *mem, - const VkMemoryAllocateInfo *pAllocateInfo); + const VkMemoryDedicatedAllocateInfo *dedicated_info); #endif /* ANV_ANDROID_H */ diff --git a/lib/mesa/src/intel/vulkan/anv_android_stubs.c b/lib/mesa/src/intel/vulkan/anv_android_stubs.c index d5bc11949..4e8c05f57 100644 --- a/lib/mesa/src/intel/vulkan/anv_android_stubs.c +++ b/lib/mesa/src/intel/vulkan/anv_android_stubs.c @@ -39,17 +39,9 @@ VkResult anv_image_bind_from_gralloc(struct anv_device *device, return VK_ERROR_EXTENSION_NOT_PRESENT; } -uint64_t -anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create, - const VkImageUsageFlags vk_usage) -{ - return 0; -} - VkResult anv_import_ahw_memory(VkDevice device_h, - struct anv_device_memory *mem, - const VkImportAndroidHardwareBufferInfoANDROID *info) + struct anv_device_memory *mem) { return VK_ERROR_EXTENSION_NOT_PRESENT; } @@ -57,7 +49,7 @@ anv_import_ahw_memory(VkDevice device_h, VkResult anv_create_ahw_memory(VkDevice device_h, struct anv_device_memory *mem, - const VkMemoryAllocateInfo *pAllocateInfo) + const VkMemoryDedicatedAllocateInfo *dedicated_info) { return VK_ERROR_EXTENSION_NOT_PRESENT; } diff --git a/lib/mesa/src/intel/vulkan/anv_bo_sync.c b/lib/mesa/src/intel/vulkan/anv_bo_sync.c index 149ae2c2b..c48d52d28 100644 --- a/lib/mesa/src/intel/vulkan/anv_bo_sync.c +++ b/lib/mesa/src/intel/vulkan/anv_bo_sync.c @@ -24,6 +24,7 @@ #include "anv_private.h" #include "util/os_time.h" +#include "util/perf/cpu_trace.h" static struct anv_bo_sync * to_anv_bo_sync(struct vk_sync *sync) @@ -105,6 +106,7 @@ anv_bo_sync_wait(struct vk_device *vk_device, { struct anv_device *device = container_of(vk_device, struct anv_device, vk); VkResult result; + MESA_TRACE_FUNC(); uint32_t pending = wait_count; while (pending) { diff --git a/lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.c b/lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.c new file mode 100644 index 000000000..003dbc88c --- /dev/null +++ b/lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.c @@ -0,0 +1,352 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#include "compiler/brw_compiler.h" +#include "compiler/brw_nir.h" +#include "compiler/spirv/nir_spirv.h" +#include "dev/intel_debug.h" +#include "util/macros.h" + +#include "anv_generated_indirect_draws.h" + +#include "shaders/gfx9_generated_draws_spv.h" +#include "shaders/gfx11_generated_draws_spv.h" + +/* This pass takes vulkan descriptor bindings 0 & 1 and turns them into global + * 64bit addresses. Binding 2 is left UBO that would normally be accessed + * through the binding table but it fully promoted to push constants. + * + * As a result we're not using the binding table at all which is nice because + * of the side command buffer we use for the generating shader does not + * interact with the binding table allocation. + */ +static bool +lower_vulkan_descriptors_instr(nir_builder *b, nir_instr *instr, void *cb_data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor) + return false; + + nir_instr *res_index_instr = intrin->src[0].ssa->parent_instr; + assert(res_index_instr->type == nir_instr_type_intrinsic); + nir_intrinsic_instr *res_index_intrin = + nir_instr_as_intrinsic(res_index_instr); + assert(res_index_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + + b->cursor = nir_after_instr(instr); + + nir_ssa_def *desc_value = NULL; + switch (nir_intrinsic_binding(res_index_intrin)) { + case 0: { + desc_value = + nir_load_ubo(b, 1, 64, + nir_imm_int(b, 2), + nir_imm_int(b, + offsetof(struct anv_generated_indirect_params, + indirect_data_addr)), + .align_mul = 8, + .align_offset = 0, + .range_base = 0, + .range = ~0); + desc_value = + nir_vec4(b, + nir_unpack_64_2x32_split_x(b, desc_value), + nir_unpack_64_2x32_split_y(b, desc_value), + nir_imm_int(b, 0), + nir_imm_int(b, 0)); + break; + } + + case 1: { + desc_value = + nir_load_ubo(b, 1, 64, + nir_imm_int(b, 2), + nir_imm_int(b, + offsetof(struct anv_generated_indirect_params, + generated_cmds_addr)), + .align_mul = 8, + .align_offset = 0, + .range_base = 0, + .range = ~0); + desc_value = + nir_vec4(b, + nir_unpack_64_2x32_split_x(b, desc_value), + nir_unpack_64_2x32_split_y(b, desc_value), + nir_imm_int(b, 0), + nir_imm_int(b, 0)); + break; + } + + case 2: { + desc_value = + nir_load_ubo(b, 1, 64, + nir_imm_int(b, 2), + nir_imm_int(b, + offsetof(struct anv_generated_indirect_params, + draw_ids_addr)), + .align_mul = 8, + .align_offset = 0, + .range_base = 0, + .range = ~0); + desc_value = + nir_vec4(b, + nir_unpack_64_2x32_split_x(b, desc_value), + nir_unpack_64_2x32_split_y(b, desc_value), + nir_imm_int(b, 0), + nir_imm_int(b, 0)); + break; + } + + case 3: + desc_value = + nir_vec2(b, + nir_imm_int(b, 2), + nir_imm_int(b, 0)); + break; + } + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc_value); + + return true; +} + +static bool +lower_vulkan_descriptors(nir_shader *shader) +{ + return nir_shader_instructions_pass(shader, + lower_vulkan_descriptors_instr, + nir_metadata_block_index | + nir_metadata_dominance, + NULL); +} + +static struct anv_shader_bin * +compile_upload_spirv(struct anv_device *device, + const void *key, + uint32_t key_size, + const uint32_t *spirv_source, + uint32_t spirv_source_size, + uint32_t sends_count_expectation) +{ + struct spirv_to_nir_options spirv_options = { + .caps = { + .int64 = true, + }, + .ubo_addr_format = nir_address_format_32bit_index_offset, + .ssbo_addr_format = nir_address_format_64bit_global_32bit_offset, + .environment = NIR_SPIRV_VULKAN, + .create_library = false, + }; + const nir_shader_compiler_options *nir_options = + device->physical->compiler->nir_options[MESA_SHADER_FRAGMENT]; + + nir_shader* nir = + spirv_to_nir(spirv_source, spirv_source_size, + NULL, 0, MESA_SHADER_FRAGMENT, "main", + &spirv_options, nir_options); + + assert(nir != NULL); + + nir->info.internal = true; + + nir_validate_shader(nir, "after spirv_to_nir"); + nir_validate_ssa_dominance(nir, "after spirv_to_nir"); + + NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS_V(nir, nir_lower_returns); + NIR_PASS_V(nir, nir_inline_functions); + NIR_PASS_V(nir, nir_opt_deref); + + /* Pick off the single entrypoint that we want */ + nir_remove_non_entrypoints(nir); + + NIR_PASS_V(nir, nir_lower_vars_to_ssa); + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_dce); + NIR_PASS_V(nir, nir_opt_cse); + NIR_PASS_V(nir, nir_opt_gcm, true); + NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false); + NIR_PASS_V(nir, nir_opt_dce); + + NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); + + NIR_PASS_V(nir, nir_split_var_copies); + NIR_PASS_V(nir, nir_split_per_member_structs); + + struct brw_compiler *compiler = device->physical->compiler; + struct brw_nir_compiler_opts opts = {}; + brw_preprocess_nir(compiler, nir, &opts); + + NIR_PASS_V(nir, nir_propagate_invariant, false); + + NIR_PASS_V(nir, nir_lower_input_attachments, + &(nir_input_attachment_options) { + .use_fragcoord_sysval = true, + .use_layer_id_sysval = true, + }); + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + /* Do vectorizing here. For some reason when trying to do it in the back + * this just isn't working. + */ + nir_load_store_vectorize_options options = { + .modes = nir_var_mem_ubo | nir_var_mem_ssbo, + .callback = brw_nir_should_vectorize_mem, + .robust_modes = (nir_variable_mode)0, + }; + NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options); + + NIR_PASS_V(nir, lower_vulkan_descriptors); + NIR_PASS_V(nir, nir_opt_dce); + + NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo, + nir_address_format_32bit_index_offset); + NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo, + nir_address_format_64bit_global_32bit_offset); + + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_constant_folding); + NIR_PASS_V(nir, nir_opt_dce); + + struct brw_wm_prog_key wm_key; + memset(&wm_key, 0, sizeof(wm_key)); + + struct brw_wm_prog_data wm_prog_data = { + .base.nr_params = nir->num_uniforms / 4, + }; + + brw_nir_analyze_ubo_ranges(compiler, nir, NULL, wm_prog_data.base.ubo_ranges); + + struct brw_compile_stats stats[3]; + struct brw_compile_fs_params params = { + .nir = nir, + .key = &wm_key, + .prog_data = &wm_prog_data, + .stats = stats, + .log_data = device, + .debug_flag = DEBUG_WM, + }; + const unsigned *program = brw_compile_fs(compiler, nir, ¶ms); + + unsigned stat_idx = 0; + if (wm_prog_data.dispatch_8) { + assert(stats[stat_idx].spills == 0); + assert(stats[stat_idx].fills == 0); + assert(stats[stat_idx].sends == sends_count_expectation); + stat_idx++; + } + if (wm_prog_data.dispatch_16) { + assert(stats[stat_idx].spills == 0); + assert(stats[stat_idx].fills == 0); + assert(stats[stat_idx].sends == sends_count_expectation); + stat_idx++; + } + if (wm_prog_data.dispatch_32) { + assert(stats[stat_idx].spills == 0); + assert(stats[stat_idx].fills == 0); + assert(stats[stat_idx].sends == sends_count_expectation * 2); + stat_idx++; + } + + struct anv_pipeline_bind_map bind_map; + memset(&bind_map, 0, sizeof(bind_map)); + + struct anv_push_descriptor_info push_desc_info = {}; + + struct anv_shader_bin *kernel = + anv_device_upload_kernel(device, + device->internal_cache, + nir->info.stage, + key, key_size, program, + wm_prog_data.base.program_size, + &wm_prog_data.base, sizeof(wm_prog_data), + NULL, 0, NULL, &bind_map, + &push_desc_info); + + ralloc_free(nir); + + return kernel; +} + +VkResult +anv_device_init_generated_indirect_draws(struct anv_device *device) +{ + const struct intel_l3_weights w = + intel_get_default_l3_weights(device->info, + true /* wants_dc_cache */, + false /* needs_slm */); + device->generated_draw_l3_config = intel_get_l3_config(device->info, w); + + struct { + char name[40]; + } indirect_draws_key = { + .name = "anv-generated-indirect-draws", + }; + + device->generated_draw_kernel = + anv_device_search_for_kernel(device, + device->internal_cache, + &indirect_draws_key, + sizeof(indirect_draws_key), + NULL); + if (device->generated_draw_kernel == NULL) { + const uint32_t *spirv_source = + device->info->ver >= 11 ? + gfx11_generated_draws_spv_source : + gfx9_generated_draws_spv_source; + const uint32_t spirv_source_size = + device->info->ver >= 11 ? + ARRAY_SIZE(gfx11_generated_draws_spv_source) : + ARRAY_SIZE(gfx9_generated_draws_spv_source); + const uint32_t send_count = + device->info->ver >= 11 ? + 11 /* 2 * (2 loads + 3 stores) + 1 store */ : + 17 /* 2 * (2 loads + 6 stores) + 1 store */; + + device->generated_draw_kernel = + compile_upload_spirv(device, + &indirect_draws_key, + sizeof(indirect_draws_key), + spirv_source, spirv_source_size, send_count); + } + if (device->generated_draw_kernel == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* The cache already has a reference and it's not going anywhere so there + * is no need to hold a second reference. + */ + anv_shader_bin_unref(device, device->generated_draw_kernel); + + return VK_SUCCESS; +} + +void +anv_device_finish_generated_indirect_draws(struct anv_device *device) +{ +} diff --git a/lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.h b/lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.h new file mode 100644 index 000000000..e8ab8553a --- /dev/null +++ b/lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.h @@ -0,0 +1,87 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef ANV_GENERATED_INDIRECT_DRAWS_H +#define ANV_GENERATED_INDIRECT_DRAWS_H + +#include <stdint.h> + +#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0) +#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1) +#define ANV_GENERATED_FLAG_DRAWID BITFIELD_BIT(2) +#define ANV_GENERATED_FLAG_BASE BITFIELD_BIT(3) + +/* This needs to match common_generated_draws.glsl : + * + * layout(set = 0, binding = 2) uniform block + */ +struct anv_generated_indirect_draw_params { + /* Draw ID buffer address (only used on Gfx9) */ + uint64_t draw_id_addr; + /* Indirect data buffer address (only used on Gfx9) */ + uint64_t indirect_data_addr; + /* Stride between each elements of the indirect data buffer */ + uint32_t indirect_data_stride; + uint32_t flags; /* 0-7: bits, 8-15: mocs, 16-23: cmd_dws */ + /* Base number of the draw ID, it is added to the index computed from the + * gl_FragCoord + */ + uint32_t draw_base; + + /* Number of draws to generate */ + uint32_t draw_count; + + /* Maximum number of draws (equals to draw_count for indirect draws without + * an indirect count) + */ + uint32_t max_draw_count; + + /* Instance multiplier for multi view */ + uint32_t instance_multiplier; + + /* Address where to jump at after the generated draw (only used with + * indirect draw count variants) + */ + uint64_t end_addr; +}; + +struct anv_generated_indirect_params { + struct anv_generated_indirect_draw_params draw; + + /* Global address of binding 0 */ + uint64_t indirect_data_addr; + + /* Global address of binding 1 */ + uint64_t generated_cmds_addr; + + /* Global address of binding 2 */ + uint64_t draw_ids_addr; + + /* CPU side pointer to the previous item when number of draws has to be + * split into smaller chunks, see while loop in + * genX(cmd_buffer_emit_indirect_generated_draws) + */ + struct anv_generated_indirect_params *prev; +}; + +#endif /* ANV_GENERATED_INDIRECT_DRAWS_H */ diff --git a/lib/mesa/src/intel/vulkan/anv_kmd_backend.c b/lib/mesa/src/intel/vulkan/anv_kmd_backend.c new file mode 100644 index 000000000..8ce882bba --- /dev/null +++ b/lib/mesa/src/intel/vulkan/anv_kmd_backend.c @@ -0,0 +1,42 @@ +/* + * Copyright © 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdlib.h> + +#include "anv_kmd_backend.h" +#include "anv_private.h" + +const struct anv_kmd_backend * +anv_kmd_backend_get(enum intel_kmd_type type) +{ + switch (type) { + case INTEL_KMD_TYPE_I915: + return anv_i915_kmd_backend_get(); + case INTEL_KMD_TYPE_XE: + return anv_xe_kmd_backend_get(); + case INTEL_KMD_TYPE_STUB: + return anv_stub_kmd_backend_get(); + default: + return NULL; + } +} diff --git a/lib/mesa/src/intel/vulkan/anv_kmd_backend.h b/lib/mesa/src/intel/vulkan/anv_kmd_backend.h new file mode 100644 index 000000000..76c5f2f27 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/anv_kmd_backend.h @@ -0,0 +1,80 @@ +/* + * Copyright © 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include <stdint.h> + +#include "vulkan/vulkan_core.h" +#include "vk_sync.h" + +#include "dev/intel_device_info.h" +#include "dev/intel_kmd.h" + +struct anv_bo; +enum anv_bo_alloc_flags; +struct anv_cmd_buffer; +struct anv_device; +struct anv_queue; +struct anv_query_pool; +struct anv_utrace_submit; + +struct anv_kmd_backend { + /* + * Create a gem buffer. + * Return the gem handle in case of success otherwise returns 0. + */ + uint32_t (*gem_create)(struct anv_device *device, + const struct intel_memory_class_instance **regions, + uint16_t num_regions, uint64_t size, + enum anv_bo_alloc_flags alloc_flags, + uint64_t *actual_size); + void (*gem_close)(struct anv_device *device, uint32_t handle); + /* Returns MAP_FAILED on error */ + void *(*gem_mmap)(struct anv_device *device, struct anv_bo *bo, + uint64_t offset, uint64_t size, + VkMemoryPropertyFlags property_flags); + int (*gem_vm_bind)(struct anv_device *device, struct anv_bo *bo); + int (*gem_vm_unbind)(struct anv_device *device, struct anv_bo *bo); + VkResult (*execute_simple_batch)(struct anv_queue *queue, + struct anv_bo *batch_bo, + uint32_t batch_bo_size); + VkResult (*queue_exec_locked)(struct anv_queue *queue, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, + uint32_t signal_count, + const struct vk_sync_signal *signals, + struct anv_query_pool *perf_query_pool, + uint32_t perf_query_pass); + VkResult (*queue_exec_trace)(struct anv_queue *queue, + struct anv_utrace_submit *submit); +}; + +const struct anv_kmd_backend *anv_kmd_backend_get(enum intel_kmd_type type); + +/* Internal functions, should only be called by anv_kmd_backend_get() */ +const struct anv_kmd_backend *anv_i915_kmd_backend_get(void); +const struct anv_kmd_backend *anv_xe_kmd_backend_get(void); +const struct anv_kmd_backend *anv_stub_kmd_backend_get(void); diff --git a/lib/mesa/src/intel/vulkan/anv_mesh_perprim_wa.c b/lib/mesa/src/intel/vulkan/anv_mesh_perprim_wa.c new file mode 100644 index 000000000..f7346b6dc --- /dev/null +++ b/lib/mesa/src/intel/vulkan/anv_mesh_perprim_wa.c @@ -0,0 +1,557 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" +#include "nir_builder.h" + +/* + * Wa_14015590813 for gfx 12.5. + * + * This file implements workaround for HW bug, which leads to fragment shader + * reading incorrect per-primitive data if mesh shader, in addition to writing + * per-primitive data, also writes to gl_ClipDistance. + * + * The suggested solution to that bug is to not use per-primitive data by: + * - creating new vertices for provoking vertices shared by multiple primitives + * - converting per-primitive attributes read by fragment shader to flat + * per-vertex attributes for the provoking vertex + * - modifying fragment shader to read those per-vertex attributes + * + * There are at least 2 type of failures not handled very well: + * - if the number of varying slots overflows, than only some attributes will + * be converted, leading to corruption of those unconverted attributes + * - if the overall MUE size is so large it doesn't fit in URB, then URB + * allocation will fail in some way; unfortunately there's no good way to + * say how big MUE will be at this moment and back out + * + * This workaround needs to be applied before linking, so that unused outputs + * created by this code are removed at link time. + * + * This workaround can be controlled by a driconf option to either disable it, + * lower its scope or force enable it. + * + * Option "anv_mesh_conv_prim_attrs_to_vert_attrs" is evaluated like this: + * value == 0 - disable workaround + * value < 0 - enable ONLY if workaround is required + * value > 0 - enable ALWAYS, even if it's not required + * abs(value) >= 1 - attribute conversion + * abs(value) >= 2 - attribute conversion and vertex duplication + * + * Default: -2 (both parts of the work around, ONLY if it's required) + * + */ + +static bool +anv_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir, + gl_varying_slot *wa_mapping, + uint64_t fs_inputs, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + void *mem_ctx, + const bool dup_vertices, + const bool force_conversion) +{ + uint64_t per_primitive_outputs = nir->info.per_primitive_outputs; + per_primitive_outputs &= ~BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES); + + if (per_primitive_outputs == 0) + return false; + + uint64_t outputs_written = nir->info.outputs_written; + uint64_t other_outputs = outputs_written & ~per_primitive_outputs; + + if ((other_outputs & (VARYING_BIT_CLIP_DIST0 | VARYING_BIT_CLIP_DIST1)) == 0) + if (!force_conversion) + return false; + + uint64_t all_outputs = outputs_written; + unsigned attrs = 0; + + uint64_t remapped_outputs = outputs_written & per_primitive_outputs; + remapped_outputs &= ~BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE); + + /* Skip locations not read by the fragment shader, because they will + * be eliminated at linking time. Note that some fs inputs may be + * removed only after optimizations, so it's possible that we will + * create too many variables. + */ + remapped_outputs &= fs_inputs; + + /* Figure out the mapping between per-primitive and new per-vertex outputs. */ + nir_foreach_shader_out_variable(var, nir) { + int location = var->data.location; + + if (!(BITFIELD64_BIT(location) & remapped_outputs)) + continue; + + /* Although primitive shading rate, layer and viewport have predefined + * place in MUE Primitive Header (so we can't really move them anywhere), + * we have to copy them to per-vertex space if fragment shader reads them. + */ + assert(location == VARYING_SLOT_PRIMITIVE_SHADING_RATE || + location == VARYING_SLOT_LAYER || + location == VARYING_SLOT_VIEWPORT || + location == VARYING_SLOT_PRIMITIVE_ID || + location >= VARYING_SLOT_VAR0); + + const struct glsl_type *type = var->type; + if (nir_is_arrayed_io(var, MESA_SHADER_MESH) || var->data.per_view) { + assert(glsl_type_is_array(type)); + type = glsl_get_array_element(type); + } + + unsigned num_slots = glsl_count_attribute_slots(type, false); + + for (gl_varying_slot slot = VARYING_SLOT_VAR0; slot <= VARYING_SLOT_VAR31; slot++) { + uint64_t mask = BITFIELD64_MASK(num_slots) << slot; + if ((all_outputs & mask) == 0) { + wa_mapping[location] = slot; + all_outputs |= mask; + attrs++; + break; + } + } + + if (wa_mapping[location] == 0) { + fprintf(stderr, "Not enough space for hardware per-primitive data corruption work around.\n"); + break; + } + } + + if (attrs == 0) + if (!force_conversion) + return false; + + unsigned provoking_vertex = 0; + + const VkPipelineRasterizationStateCreateInfo *rs_info = pCreateInfo->pRasterizationState; + const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *rs_pv_info = + vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT); + if (rs_pv_info && rs_pv_info->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) + provoking_vertex = 2; + + unsigned vertices_per_primitive = + num_mesh_vertices_per_primitive(nir->info.mesh.primitive_type); + + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + nir_builder b; + nir_builder_init(&b, impl); + b.cursor = nir_after_cf_list(&impl->body); + + /* wait for all subgroups to finish */ + nir_scoped_barrier(&b, NIR_SCOPE_WORKGROUP); + + nir_ssa_def *zero = nir_imm_int(&b, 0); + + nir_ssa_def *local_invocation_index = nir_build_load_local_invocation_index(&b); + + nir_ssa_def *cmp = nir_ieq(&b, local_invocation_index, zero); + nir_if *if_stmt = nir_push_if(&b, cmp); + { + nir_variable *primitive_count_var = NULL; + nir_variable *primitive_indices_var = NULL; + + unsigned num_other_variables = 0; + nir_foreach_shader_out_variable(var, b.shader) { + if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0) + continue; + num_other_variables++; + } + + nir_deref_instr **per_vertex_derefs = + ralloc_array(mem_ctx, nir_deref_instr *, num_other_variables); + + unsigned num_per_vertex_variables = 0; + + unsigned processed = 0; + nir_foreach_shader_out_variable(var, b.shader) { + if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0) + continue; + + switch (var->data.location) { + case VARYING_SLOT_PRIMITIVE_COUNT: + primitive_count_var = var; + break; + case VARYING_SLOT_PRIMITIVE_INDICES: + primitive_indices_var = var; + break; + default: { + const struct glsl_type *type = var->type; + assert(glsl_type_is_array(type)); + const struct glsl_type *array_element_type = + glsl_get_array_element(type); + + if (dup_vertices) { + /* + * Resize type of array output to make space for one extra + * vertex attribute for each primitive, so we ensure that + * the provoking vertex is not shared between primitives. + */ + const struct glsl_type *new_type = + glsl_array_type(array_element_type, + glsl_get_length(type) + + nir->info.mesh.max_primitives_out, + 0); + + var->type = new_type; + } + + per_vertex_derefs[num_per_vertex_variables++] = + nir_build_deref_var(&b, var); + break; + } + } + + ++processed; + } + assert(processed == num_other_variables); + + assert(primitive_count_var != NULL); + assert(primitive_indices_var != NULL); + + /* Update types of derefs to match type of variables they (de)reference. */ + if (dup_vertices) { + nir_foreach_function(function, b.shader) { + if (!function->impl) + continue; + + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_deref) + continue; + + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (deref->deref_type != nir_deref_type_var) + continue; + + if (deref->var->type != deref->type) + deref->type = deref->var->type; + } + } + } + } + + /* indexed by slot of per-prim attribute */ + struct { + nir_deref_instr *per_prim_deref; + nir_deref_instr *per_vert_deref; + } mapping[VARYING_SLOT_MAX] = {{NULL, NULL}, }; + + /* Create new per-vertex output variables mirroring per-primitive variables + * and create derefs for both old and new variables. + */ + nir_foreach_shader_out_variable(var, b.shader) { + gl_varying_slot location = var->data.location; + + if ((BITFIELD64_BIT(location) & (outputs_written & per_primitive_outputs)) == 0) + continue; + if (wa_mapping[location] == 0) + continue; + + const struct glsl_type *type = var->type; + assert(glsl_type_is_array(type)); + const struct glsl_type *array_element_type = glsl_get_array_element(type); + + const struct glsl_type *new_type = + glsl_array_type(array_element_type, + nir->info.mesh.max_vertices_out + + (dup_vertices ? nir->info.mesh.max_primitives_out : 0), + 0); + + nir_variable *new_var = + nir_variable_create(b.shader, nir_var_shader_out, new_type, var->name); + assert(wa_mapping[location] >= VARYING_SLOT_VAR0); + assert(wa_mapping[location] <= VARYING_SLOT_VAR31); + new_var->data.location = wa_mapping[location]; + new_var->data.interpolation = INTERP_MODE_FLAT; + + mapping[location].per_vert_deref = nir_build_deref_var(&b, new_var); + mapping[location].per_prim_deref = nir_build_deref_var(&b, var); + } + + nir_ssa_def *trueconst = nir_imm_true(&b); + + /* + * for each Primitive (0 : primitiveCount) + * if VertexUsed[PrimitiveIndices[Primitive][provoking vertex]] + * create 1 new vertex at offset "Vertex" + * copy per vert attributes of provoking vertex to the new one + * update PrimitiveIndices[Primitive][provoking vertex] + * Vertex++ + * else + * VertexUsed[PrimitiveIndices[Primitive][provoking vertex]] := true + * + * for each attribute : mapping + * copy per_prim_attr(Primitive) to per_vert_attr[Primitive][provoking vertex] + */ + + /* primitive count */ + nir_ssa_def *primitive_count = nir_load_var(&b, primitive_count_var); + + /* primitive index */ + nir_variable *primitive_var = + nir_local_variable_create(impl, glsl_uint_type(), "Primitive"); + nir_deref_instr *primitive_deref = nir_build_deref_var(&b, primitive_var); + nir_store_deref(&b, primitive_deref, zero, 1); + + /* vertex index */ + nir_variable *vertex_var = + nir_local_variable_create(impl, glsl_uint_type(), "Vertex"); + nir_deref_instr *vertex_deref = nir_build_deref_var(&b, vertex_var); + nir_store_deref(&b, vertex_deref, nir_imm_int(&b, nir->info.mesh.max_vertices_out), 1); + + /* used vertices bitvector */ + const struct glsl_type *used_vertex_type = + glsl_array_type(glsl_bool_type(), + nir->info.mesh.max_vertices_out, + 0); + nir_variable *used_vertex_var = + nir_local_variable_create(impl, used_vertex_type, "VertexUsed"); + nir_deref_instr *used_vertex_deref = + nir_build_deref_var(&b, used_vertex_var); + /* Initialize it as "not used" */ + for (unsigned i = 0; i < nir->info.mesh.max_vertices_out; ++i) { + nir_deref_instr *indexed_used_vertex_deref = + nir_build_deref_array(&b, used_vertex_deref, nir_imm_int(&b, i)); + nir_store_deref(&b, indexed_used_vertex_deref, nir_imm_false(&b), 1); + } + + nir_loop *loop = nir_push_loop(&b); + { + nir_ssa_def *primitive = nir_load_deref(&b, primitive_deref); + nir_ssa_def *cmp = nir_ige(&b, primitive, primitive_count); + + nir_if *loop_check = nir_push_if(&b, cmp); + nir_jump(&b, nir_jump_break); + nir_pop_if(&b, loop_check); + + nir_deref_instr *primitive_indices_deref = + nir_build_deref_var(&b, primitive_indices_var); + nir_deref_instr *indexed_primitive_indices_deref; + nir_ssa_def *src_vertex; + nir_ssa_def *prim_indices; + + if (nir->info.mesh.nv) { + /* flat array, but we can deref each index directly */ + nir_ssa_def *index_index = + nir_imul(&b, primitive, nir_imm_int(&b, vertices_per_primitive)); + index_index = nir_iadd(&b, index_index, nir_imm_int(&b, provoking_vertex)); + indexed_primitive_indices_deref = nir_build_deref_array(&b, primitive_indices_deref, index_index); + src_vertex = nir_load_deref(&b, indexed_primitive_indices_deref); + prim_indices = NULL; + } else { + /* array of vectors, we have to extract index out of array deref */ + indexed_primitive_indices_deref = nir_build_deref_array(&b, primitive_indices_deref, primitive); + prim_indices = nir_load_deref(&b, indexed_primitive_indices_deref); + src_vertex = nir_channel(&b, prim_indices, provoking_vertex); + } + + nir_ssa_def *dst_vertex = nir_load_deref(&b, vertex_deref); + + nir_deref_instr *indexed_used_vertex_deref = + nir_build_deref_array(&b, used_vertex_deref, src_vertex); + nir_ssa_def *used_vertex = nir_load_deref(&b, indexed_used_vertex_deref); + if (!dup_vertices) + used_vertex = nir_imm_false(&b); + + nir_if *vertex_used_check = nir_push_if(&b, used_vertex); + { + for (unsigned a = 0; a < num_per_vertex_variables; ++a) { + nir_deref_instr *attr_arr = per_vertex_derefs[a]; + nir_deref_instr *src = nir_build_deref_array(&b, attr_arr, src_vertex); + nir_deref_instr *dst = nir_build_deref_array(&b, attr_arr, dst_vertex); + + nir_copy_deref(&b, dst, src); + } + + if (nir->info.mesh.nv) { + nir_store_deref(&b, indexed_primitive_indices_deref, dst_vertex, 1); + } else { + /* replace one component of primitive indices vector */ + nir_ssa_def *new_val = + nir_vector_insert_imm(&b, prim_indices, dst_vertex, provoking_vertex); + + /* and store complete vector */ + nir_store_deref(&b, indexed_primitive_indices_deref, new_val, + BITFIELD_MASK(vertices_per_primitive)); + } + + nir_store_deref(&b, vertex_deref, nir_iadd_imm(&b, dst_vertex, 1), 1); + + for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) { + if (!mapping[i].per_vert_deref) + continue; + + nir_deref_instr *src = + nir_build_deref_array(&b, mapping[i].per_prim_deref, primitive); + nir_deref_instr *dst = + nir_build_deref_array(&b, mapping[i].per_vert_deref, dst_vertex); + + nir_copy_deref(&b, dst, src); + } + } + nir_push_else(&b, vertex_used_check); + { + nir_store_deref(&b, indexed_used_vertex_deref, trueconst, 1); + + for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) { + if (!mapping[i].per_vert_deref) + continue; + + nir_deref_instr *src = + nir_build_deref_array(&b, mapping[i].per_prim_deref, primitive); + nir_deref_instr *dst = + nir_build_deref_array(&b, mapping[i].per_vert_deref, src_vertex); + + nir_copy_deref(&b, dst, src); + } + + } + nir_pop_if(&b, vertex_used_check); + + nir_store_deref(&b, primitive_deref, nir_iadd_imm(&b, primitive, 1), 1); + } + nir_pop_loop(&b, loop); + } + nir_pop_if(&b, if_stmt); /* local_invocation_index == 0 */ + + if (dup_vertices) + nir->info.mesh.max_vertices_out += nir->info.mesh.max_primitives_out; + + if (should_print_nir(nir)) { + printf("%s\n", __func__); + nir_print_shader(nir, stdout); + } + + /* deal with copy_derefs */ + NIR_PASS(_, nir, nir_split_var_copies); + NIR_PASS(_, nir, nir_lower_var_copies); + + nir_shader_gather_info(nir, impl); + + return true; +} + +static bool +anv_frag_update_derefs_instr(struct nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_deref) + return false; + + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (deref->deref_type != nir_deref_type_var) + return false; + + nir_variable *var = deref->var; + if (!(var->data.mode & nir_var_shader_in)) + return false; + + int location = var->data.location; + nir_deref_instr **new_derefs = (nir_deref_instr **)data; + if (new_derefs[location] == NULL) + return false; + + assert(deref->dest.is_ssa); + assert(new_derefs[location]->dest.is_ssa); + + nir_instr_remove(&deref->instr); + nir_ssa_def_rewrite_uses(&deref->dest.ssa, &new_derefs[location]->dest.ssa); + + return true; +} + +static bool +anv_frag_update_derefs(nir_shader *shader, nir_deref_instr **mapping) +{ + return nir_shader_instructions_pass(shader, anv_frag_update_derefs_instr, + nir_metadata_none, (void *)mapping); +} + +/* Update fragment shader inputs with new ones. */ +static void +anv_frag_convert_attrs_prim_to_vert(struct nir_shader *nir, + gl_varying_slot *wa_mapping) +{ + /* indexed by slot of per-prim attribute */ + nir_deref_instr *new_derefs[VARYING_SLOT_MAX] = {NULL, }; + + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + nir_builder b; + nir_builder_init(&b, impl); + b.cursor = nir_before_cf_list(&impl->body); + + nir_foreach_shader_in_variable_safe(var, nir) { + gl_varying_slot location = var->data.location; + gl_varying_slot new_location = wa_mapping[location]; + if (new_location == 0) + continue; + + assert(wa_mapping[new_location] == 0); + + nir_variable *new_var = + nir_variable_create(b.shader, nir_var_shader_in, var->type, var->name); + new_var->data.location = new_location; + new_var->data.location_frac = var->data.location_frac; + new_var->data.interpolation = INTERP_MODE_FLAT; + + new_derefs[location] = nir_build_deref_var(&b, new_var); + } + + NIR_PASS(_, nir, anv_frag_update_derefs, new_derefs); + + nir_shader_gather_info(nir, impl); +} + +void +anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir, + struct nir_shader *fs_nir, + struct anv_device *device, + const VkGraphicsPipelineCreateInfo *info) +{ + const struct intel_device_info *devinfo = device->info; + + int mesh_conv_prim_attrs_to_vert_attrs = + device->physical->instance->mesh_conv_prim_attrs_to_vert_attrs; + if (mesh_conv_prim_attrs_to_vert_attrs < 0 && + !intel_needs_workaround(devinfo, 14015590813)) + mesh_conv_prim_attrs_to_vert_attrs = 0; + + if (mesh_conv_prim_attrs_to_vert_attrs != 0) { + uint64_t fs_inputs = 0; + nir_foreach_shader_in_variable(var, fs_nir) + fs_inputs |= BITFIELD64_BIT(var->data.location); + + void *stage_ctx = ralloc_context(NULL); + + gl_varying_slot wa_mapping[VARYING_SLOT_MAX] = { 0, }; + + const bool dup_vertices = abs(mesh_conv_prim_attrs_to_vert_attrs) >= 2; + const bool force_conversion = mesh_conv_prim_attrs_to_vert_attrs > 0; + + if (anv_mesh_convert_attrs_prim_to_vert(ms_nir, wa_mapping, + fs_inputs, info, stage_ctx, + dup_vertices, force_conversion)) + anv_frag_convert_attrs_prim_to_vert(fs_nir, wa_mapping); + + ralloc_free(stage_ctx); + } +} diff --git a/lib/mesa/src/intel/vulkan/anv_nir_compute_push_layout.c b/lib/mesa/src/intel/vulkan/anv_nir_compute_push_layout.c index 22478e7e3..1d4b8009e 100644 --- a/lib/mesa/src/intel/vulkan/anv_nir_compute_push_layout.c +++ b/lib/mesa/src/intel/vulkan/anv_nir_compute_push_layout.c @@ -67,12 +67,13 @@ anv_nir_compute_push_layout(nir_shader *nir, break; } - case nir_intrinsic_load_desc_set_address_intel: - push_start = MIN2(push_start, - offsetof(struct anv_push_constants, desc_sets)); - push_end = MAX2(push_end, push_start + + case nir_intrinsic_load_desc_set_address_intel: { + unsigned base = offsetof(struct anv_push_constants, desc_sets); + push_start = MIN2(push_start, base); + push_end = MAX2(push_end, base + sizeof_field(struct anv_push_constants, desc_sets)); break; + } default: break; @@ -117,7 +118,7 @@ anv_nir_compute_push_layout(nir_shader *nir, * push_end (no push constants is indicated by push_start = UINT_MAX). */ push_start = MIN2(push_start, push_end); - push_start = align_down_u32(push_start, 32); + push_start = ROUND_DOWN_TO(push_start, 32); /* For vec4 our push data size needs to be aligned to a vec4 and for * scalar, it needs to be aligned to a DWORD. diff --git a/lib/mesa/src/intel/vulkan/anv_nir_lower_ubo_loads.c b/lib/mesa/src/intel/vulkan/anv_nir_lower_ubo_loads.c index 5a170352c..f1609a22c 100644 --- a/lib/mesa/src/intel/vulkan/anv_nir_lower_ubo_loads.c +++ b/lib/mesa/src/intel/vulkan/anv_nir_lower_ubo_loads.c @@ -47,7 +47,7 @@ lower_ubo_load_instr(nir_builder *b, nir_instr *instr, UNUSED void *_data) unsigned byte_size = bit_size / 8; nir_ssa_def *val; - if (nir_src_is_const(load->src[1])) { + if (!nir_src_is_divergent(load->src[0]) && nir_src_is_const(load->src[1])) { uint32_t offset = nir_src_as_uint(load->src[1]); /* Things should be component-aligned. */ diff --git a/lib/mesa/src/intel/vulkan/anv_perf.c b/lib/mesa/src/intel/vulkan/anv_perf.c index 49cbef52a..3b23067ab 100644 --- a/lib/mesa/src/intel/vulkan/anv_perf.c +++ b/lib/mesa/src/intel/vulkan/anv_perf.c @@ -109,7 +109,10 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id) properties[p++] = metric_id; properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT; - properties[p++] = I915_OA_FORMAT_A32u40_A4u32_B8_C8; + properties[p++] = + device->info->verx10 >= 125 ? + I915_OA_FORMAT_A24u40_A14u32_B8_C8 : + I915_OA_FORMAT_A32u40_A4u32_B8_C8; properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT; properties[p++] = 31; /* slowest sampling period */ @@ -363,7 +366,10 @@ VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, &out_desc, desc) { desc->flags = 0; /* None so far. */ - snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name); + snprintf(desc->name, sizeof(desc->name), "%s", + INTEL_DEBUG(DEBUG_PERF_SYMBOL_NAMES) ? + intel_counter->symbol_name : + intel_counter->name); snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category); snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc); } @@ -430,10 +436,12 @@ anv_perf_write_pass_results(struct intel_perf_config *perf, const struct intel_perf_query_result *accumulated_results, union VkPerformanceCounterResultKHR *results) { + const struct intel_perf_query_info *query = pool->pass_query[pass]; + for (uint32_t c = 0; c < pool->n_counters; c++) { const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c]; - if (counter_pass->pass != pass) + if (counter_pass->query != query) continue; switch (pool->pass_query[pass]->kind) { diff --git a/lib/mesa/src/intel/vulkan/anv_utrace.c b/lib/mesa/src/intel/vulkan/anv_utrace.c index 3a35aefe4..99dfc50d4 100644 --- a/lib/mesa/src/intel/vulkan/anv_utrace.c +++ b/lib/mesa/src/intel/vulkan/anv_utrace.c @@ -23,15 +23,19 @@ #include "anv_private.h" +#include "ds/intel_tracepoints.h" +#include "genxml/gen8_pack.h" #include "perf/intel_perf.h" +#include "vulkan/runtime/vk_common_entrypoints.h" + static uint32_t command_buffers_count_utraces(struct anv_device *device, uint32_t cmd_buffer_count, struct anv_cmd_buffer **cmd_buffers, uint32_t *utrace_copies) { - if (!u_trace_context_actively_tracing(&device->ds.trace_context)) + if (!u_trace_should_process(&device->ds.trace_context)) return 0; uint32_t utraces = 0; @@ -47,25 +51,25 @@ command_buffers_count_utraces(struct anv_device *device, } static void -anv_utrace_delete_flush_data(struct u_trace_context *utctx, - void *flush_data) +anv_utrace_delete_submit(struct u_trace_context *utctx, void *submit_data) { struct anv_device *device = container_of(utctx, struct anv_device, ds.trace_context); - struct anv_utrace_flush_copy *flush = flush_data; + struct anv_utrace_submit *submit = submit_data; + + intel_ds_flush_data_fini(&submit->ds); - intel_ds_flush_data_fini(&flush->ds); + if (submit->trace_bo) + anv_bo_pool_free(&device->utrace_bo_pool, submit->trace_bo); - if (flush->trace_bo) { - assert(flush->batch_bo); - anv_reloc_list_finish(&flush->relocs, &device->vk.alloc); - anv_device_release_bo(device, flush->batch_bo); - anv_device_release_bo(device, flush->trace_bo); + if (submit->batch_bo) { + anv_reloc_list_finish(&submit->relocs, &device->vk.alloc); + anv_bo_pool_free(&device->utrace_bo_pool, submit->batch_bo); } - vk_sync_destroy(&device->vk, flush->sync); + vk_sync_destroy(&device->vk, submit->sync); - vk_free(&device->vk.alloc, flush); + vk_free(&device->vk.alloc, submit); } static void @@ -77,13 +81,13 @@ anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx, { struct anv_device *device = container_of(utctx, struct anv_device, ds.trace_context); - struct anv_utrace_flush_copy *flush = cmdstream; + struct anv_utrace_submit *submit = cmdstream; struct anv_address from_addr = (struct anv_address) { .bo = ts_from, .offset = from_offset * sizeof(uint64_t) }; struct anv_address to_addr = (struct anv_address) { .bo = ts_to, .offset = to_offset * sizeof(uint64_t) }; - anv_genX(device->info, emit_so_memcpy)(&flush->memcpy_state, + anv_genX(device->info, emit_so_memcpy)(&submit->memcpy_state, to_addr, from_addr, count * sizeof(uint64_t)); } @@ -91,7 +95,7 @@ VkResult anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, uint32_t cmd_buffer_count, struct anv_cmd_buffer **cmd_buffers, - struct anv_utrace_flush_copy **out_flush_data) + struct anv_utrace_submit **out_submit) { struct anv_device *device = queue->device; uint32_t utrace_copies = 0; @@ -100,94 +104,105 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, cmd_buffers, &utrace_copies); if (!utraces) { - *out_flush_data = NULL; + *out_submit = NULL; return VK_SUCCESS; } VkResult result; - struct anv_utrace_flush_copy *flush = - vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_flush_copy), + struct anv_utrace_submit *submit = + vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_submit), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!flush) + if (!submit) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - intel_ds_flush_data_init(&flush->ds, &queue->ds, queue->ds.submission_id); + intel_ds_flush_data_init(&submit->ds, &queue->ds, queue->ds.submission_id); result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type, - 0, 0, &flush->sync); + 0, 0, &submit->sync); if (result != VK_SUCCESS) goto error_sync; if (utrace_copies > 0) { result = anv_bo_pool_alloc(&device->utrace_bo_pool, utrace_copies * 4096, - &flush->trace_bo); + &submit->trace_bo); if (result != VK_SUCCESS) goto error_trace_buf; + uint32_t batch_size = 512; /* 128 dwords of setup */ + if (device->info->verx10 == 120 || intel_device_info_is_dg2(device->info)) { + /* Enable/Disable preemption at the begin/end */ + batch_size += 2 * (250 /* 250 MI_NOOPs*/ + + 6 /* PIPE_CONTROL */ + + 3 /* MI_LRI */) * 4 /* dwords */; + } + batch_size += 256 * utrace_copies; /* 64 dwords per copy */ + batch_size = align(batch_size + 4, 8); /* MI_BATCH_BUFFER_END */ + result = anv_bo_pool_alloc(&device->utrace_bo_pool, - /* 128 dwords of setup + 64 dwords per copy */ - align_u32(512 + 64 * utrace_copies, 4096), - &flush->batch_bo); + align(batch_size, 4096), + &submit->batch_bo); if (result != VK_SUCCESS) goto error_batch_buf; - result = anv_reloc_list_init(&flush->relocs, &device->vk.alloc); + result = anv_reloc_list_init(&submit->relocs, &device->vk.alloc); if (result != VK_SUCCESS) goto error_reloc_list; - flush->batch.alloc = &device->vk.alloc; - flush->batch.relocs = &flush->relocs; - anv_batch_set_storage(&flush->batch, - (struct anv_address) { .bo = flush->batch_bo, }, - flush->batch_bo->map, flush->batch_bo->size); + submit->batch.alloc = &device->vk.alloc; + submit->batch.relocs = &submit->relocs; + anv_batch_set_storage(&submit->batch, + (struct anv_address) { .bo = submit->batch_bo, }, + submit->batch_bo->map, submit->batch_bo->size); /* Emit the copies */ - anv_genX(device->info, emit_so_memcpy_init)(&flush->memcpy_state, - device, - &flush->batch); + anv_genX(device->info, emit_so_memcpy_init)(&submit->memcpy_state, + device, + &submit->batch); for (uint32_t i = 0; i < cmd_buffer_count; i++) { if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) { - u_trace_flush(&cmd_buffers[i]->trace, flush, false); + u_trace_flush(&cmd_buffers[i]->trace, submit, false); } else { u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace), u_trace_end_iterator(&cmd_buffers[i]->trace), - &flush->ds.trace, - flush, + &submit->ds.trace, + submit, anv_device_utrace_emit_copy_ts_buffer); } } - anv_genX(device->info, emit_so_memcpy_fini)(&flush->memcpy_state); + anv_genX(device->info, emit_so_memcpy_fini)(&submit->memcpy_state); + anv_genX(device->info, emit_so_memcpy_end)(&submit->memcpy_state); - u_trace_flush(&flush->ds.trace, flush, true); + u_trace_flush(&submit->ds.trace, submit, true); - if (flush->batch.status != VK_SUCCESS) { - result = flush->batch.status; + if (submit->batch.status != VK_SUCCESS) { + result = submit->batch.status; goto error_batch; } } else { for (uint32_t i = 0; i < cmd_buffer_count; i++) { assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); - u_trace_flush(&cmd_buffers[i]->trace, flush, i == (cmd_buffer_count - 1)); + u_trace_flush(&cmd_buffers[i]->trace, submit, i == (cmd_buffer_count - 1)); } } - flush->queue = queue; + submit->queue = queue; - *out_flush_data = flush; + *out_submit = submit; return VK_SUCCESS; error_batch: - anv_reloc_list_finish(&flush->relocs, &device->vk.alloc); + anv_reloc_list_finish(&submit->relocs, &device->vk.alloc); error_reloc_list: - anv_bo_pool_free(&device->utrace_bo_pool, flush->batch_bo); + anv_bo_pool_free(&device->utrace_bo_pool, submit->batch_bo); error_batch_buf: - anv_bo_pool_free(&device->utrace_bo_pool, flush->trace_bo); + anv_bo_pool_free(&device->utrace_bo_pool, submit->trace_bo); error_trace_buf: - vk_sync_destroy(&device->vk, flush->sync); + vk_sync_destroy(&device->vk, submit->sync); error_sync: - vk_free(&device->vk.alloc, flush); + intel_ds_flush_data_fini(&submit->ds); + vk_free(&device->vk.alloc, submit); return result; } @@ -200,7 +215,7 @@ anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b) struct anv_bo *bo = NULL; UNUSED VkResult result = anv_bo_pool_alloc(&device->utrace_bo_pool, - align_u32(size_b, 4096), + align(size_b, 4096), &bo); assert(result == VK_SUCCESS); @@ -222,15 +237,17 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps, unsigned idx, bool end_of_pipe) { - struct anv_cmd_buffer *cmd_buffer = - container_of(ut, struct anv_cmd_buffer, trace); - struct anv_device *device = cmd_buffer->device; + struct anv_device *device = + container_of(ut->utctx, struct anv_device, ds.trace_context); + struct anv_batch *batch = + cs != NULL ? cs : + &container_of(ut, struct anv_cmd_buffer, trace)->batch; struct anv_bo *bo = timestamps; enum anv_timestamp_capture_type capture_type = (end_of_pipe) ? ANV_TIMESTAMP_CAPTURE_END_OF_PIPE : ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE; - device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device, + device->physical->cmd_emit_timestamp(batch, device, (struct anv_address) { .bo = bo, .offset = idx * sizeof(uint64_t) }, @@ -244,13 +261,13 @@ anv_utrace_read_ts(struct u_trace_context *utctx, struct anv_device *device = container_of(utctx, struct anv_device, ds.trace_context); struct anv_bo *bo = timestamps; - struct anv_utrace_flush_copy *flush = flush_data; + struct anv_utrace_submit *submit = flush_data; /* Only need to stall on results for the first entry: */ if (idx == 0) { UNUSED VkResult result = vk_sync_wait(&device->vk, - flush->sync, + submit->sync, 0, VK_SYNC_WAIT_COMPLETE, os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE)); @@ -271,7 +288,7 @@ anv_device_utrace_init(struct anv_device *device) { anv_bo_pool_init(&device->utrace_bo_pool, device, "utrace"); intel_ds_device_init(&device->ds, device->info, device->fd, - device->physical->local_minor - 128, + device->physical->local_minor, INTEL_DS_API_VULKAN); u_trace_context_init(&device->ds.trace_context, &device->ds, @@ -279,14 +296,14 @@ anv_device_utrace_init(struct anv_device *device) anv_utrace_destroy_ts_buffer, anv_utrace_record_ts, anv_utrace_read_ts, - anv_utrace_delete_flush_data); + anv_utrace_delete_submit); for (uint32_t q = 0; q < device->queue_count; q++) { struct anv_queue *queue = &device->queues[q]; intel_ds_device_init_queue(&device->ds, &queue->ds, "%s%u", - intel_engines_class_to_string(queue->family->engine_class), - queue->index_in_family); + intel_engines_class_to_string(queue->family->engine_class), + queue->vk.index_in_family); } } @@ -319,6 +336,8 @@ anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits) { .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT, .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, }, { .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT, .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, }, { .anv = ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, .ds = INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, }, + { .anv = ANV_PIPE_PSS_STALL_SYNC_BIT, .ds = INTEL_DS_PSS_STALL_SYNC_BIT, }, + { .anv = ANV_PIPE_END_OF_PIPE_SYNC_BIT, .ds = INTEL_DS_END_OF_PIPE_BIT, }, }; enum intel_ds_stall_flag ret = 0; @@ -329,3 +348,140 @@ anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits) return ret; } + +void anv_CmdBeginDebugUtilsLabelEXT( + VkCommandBuffer _commandBuffer, + const VkDebugUtilsLabelEXT *pLabelInfo) +{ + VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _commandBuffer); + + vk_common_CmdBeginDebugUtilsLabelEXT(_commandBuffer, pLabelInfo); + + trace_intel_begin_cmd_buffer_annotation(&cmd_buffer->trace); +} + +void anv_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer) +{ + VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _commandBuffer); + + if (cmd_buffer->vk.labels.size > 0) { + const VkDebugUtilsLabelEXT *label = + util_dynarray_top_ptr(&cmd_buffer->vk.labels, VkDebugUtilsLabelEXT); + + trace_intel_end_cmd_buffer_annotation(&cmd_buffer->trace, + strlen(label->pLabelName), + label->pLabelName); + } + + vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer); +} + +void +anv_queue_trace(struct anv_queue *queue, const char *label, bool frame, bool begin) +{ + struct anv_device *device = queue->device; + + VkResult result; + struct anv_utrace_submit *submit = + vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_submit), + 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!submit) + return; + + submit->queue = queue; + + intel_ds_flush_data_init(&submit->ds, &queue->ds, queue->ds.submission_id); + + result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type, + 0, 0, &submit->sync); + if (result != VK_SUCCESS) + goto error_trace; + + result = anv_bo_pool_alloc(&device->utrace_bo_pool, 4096, + &submit->batch_bo); + if (result != VK_SUCCESS) + goto error_sync; + + result = anv_reloc_list_init(&submit->relocs, &device->vk.alloc); + if (result != VK_SUCCESS) + goto error_batch_bo; + + submit->batch.alloc = &device->vk.alloc; + submit->batch.relocs = &submit->relocs; + anv_batch_set_storage(&submit->batch, + (struct anv_address) { .bo = submit->batch_bo, }, + submit->batch_bo->map, submit->batch_bo->size); + + if (frame) { + if (begin) + trace_intel_begin_frame(&submit->ds.trace, &submit->batch); + else + trace_intel_end_frame(&submit->ds.trace, &submit->batch, + device->debug_frame_desc->frame_id); + } else { + if (begin) { + trace_intel_begin_queue_annotation(&submit->ds.trace, &submit->batch); + } else { + trace_intel_end_queue_annotation(&submit->ds.trace, + &submit->batch, + strlen(label), + label); + } + } + + anv_batch_emit(&submit->batch, GFX8_MI_BATCH_BUFFER_END, bbs); + anv_batch_emit(&submit->batch, GFX8_MI_NOOP, noop); + + if (submit->batch.status != VK_SUCCESS) { + result = submit->batch.status; + goto error_reloc_list; + } + + u_trace_flush(&submit->ds.trace, submit, true); + + pthread_mutex_lock(&device->mutex); + device->kmd_backend->queue_exec_trace(queue, submit); + pthread_mutex_unlock(&device->mutex); + + return; + + error_reloc_list: + anv_reloc_list_finish(&submit->relocs, &device->vk.alloc); + error_batch_bo: + anv_bo_pool_free(&device->utrace_bo_pool, submit->batch_bo); + error_sync: + vk_sync_destroy(&device->vk, submit->sync); + error_trace: + intel_ds_flush_data_fini(&submit->ds); + vk_free(&device->vk.alloc, submit); +} + +void +anv_QueueBeginDebugUtilsLabelEXT( + VkQueue _queue, + const VkDebugUtilsLabelEXT *pLabelInfo) +{ + VK_FROM_HANDLE(anv_queue, queue, _queue); + + vk_common_QueueBeginDebugUtilsLabelEXT(_queue, pLabelInfo); + + anv_queue_trace(queue, pLabelInfo->pLabelName, + false /* frame */, true /* begin */); +} + +void +anv_QueueEndDebugUtilsLabelEXT(VkQueue _queue) +{ + VK_FROM_HANDLE(anv_queue, queue, _queue); + + if (queue->vk.labels.size > 0) { + const VkDebugUtilsLabelEXT *label = + util_dynarray_top_ptr(&queue->vk.labels, VkDebugUtilsLabelEXT); + anv_queue_trace(queue, label->pLabelName, + false /* frame */, false /* begin */); + + u_trace_context_process(&queue->device->ds.trace_context, true); + } + + vk_common_QueueEndDebugUtilsLabelEXT(_queue); +} diff --git a/lib/mesa/src/intel/vulkan/anv_video.c b/lib/mesa/src/intel/vulkan/anv_video.c new file mode 100644 index 000000000..38a3b09b2 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/anv_video.c @@ -0,0 +1,267 @@ +/* + * Copyright © 2021 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#include "vk_video/vulkan_video_codecs_common.h" + +VkResult +anv_CreateVideoSessionKHR(VkDevice _device, + const VkVideoSessionCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkVideoSessionKHR *pVideoSession) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + struct anv_video_session *vid = + vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*vid), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!vid) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(vid, 0, sizeof(struct anv_video_session)); + + VkResult result = vk_video_session_init(&device->vk, + &vid->vk, + pCreateInfo); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, pAllocator, vid); + return result; + } + + *pVideoSession = anv_video_session_to_handle(vid); + return VK_SUCCESS; +} + +void +anv_DestroyVideoSessionKHR(VkDevice _device, + VkVideoSessionKHR _session, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_video_session, vid, _session); + if (!_session) + return; + + vk_object_base_finish(&vid->vk.base); + vk_free2(&device->vk.alloc, pAllocator, vid); +} + +VkResult +anv_CreateVideoSessionParametersKHR(VkDevice _device, + const VkVideoSessionParametersCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkVideoSessionParametersKHR *pVideoSessionParameters) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_video_session, vid, pCreateInfo->videoSession); + ANV_FROM_HANDLE(anv_video_session_params, templ, pCreateInfo->videoSessionParametersTemplate); + struct anv_video_session_params *params = + vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*params), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!params) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult result = vk_video_session_parameters_init(&device->vk, + ¶ms->vk, + &vid->vk, + templ ? &templ->vk : NULL, + pCreateInfo); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, pAllocator, params); + return result; + } + + *pVideoSessionParameters = anv_video_session_params_to_handle(params); + return VK_SUCCESS; +} + +void +anv_DestroyVideoSessionParametersKHR(VkDevice _device, + VkVideoSessionParametersKHR _params, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_video_session_params, params, _params); + if (!_params) + return; + vk_video_session_parameters_finish(&device->vk, ¶ms->vk); + vk_free2(&device->vk.alloc, pAllocator, params); +} + +VkResult +anv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, + const VkVideoProfileInfoKHR *pVideoProfile, + VkVideoCapabilitiesKHR *pCapabilities) +{ + pCapabilities->minBitstreamBufferOffsetAlignment = 32; + pCapabilities->minBitstreamBufferSizeAlignment = 32; + pCapabilities->pictureAccessGranularity.width = ANV_MB_WIDTH; + pCapabilities->pictureAccessGranularity.height = ANV_MB_HEIGHT; + pCapabilities->minCodedExtent.width = ANV_MB_WIDTH; + pCapabilities->minCodedExtent.height = ANV_MB_HEIGHT; + pCapabilities->maxCodedExtent.width = 4096; + pCapabilities->maxCodedExtent.height = 4096; + pCapabilities->flags = VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR; + + struct VkVideoDecodeCapabilitiesKHR *dec_caps = (struct VkVideoDecodeCapabilitiesKHR *) + vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_CAPABILITIES_KHR); + if (dec_caps) + dec_caps->flags = VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR; + + switch (pVideoProfile->videoCodecOperation) { + case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: { + struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *) + vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR); + pCapabilities->maxDpbSlots = 17; + pCapabilities->maxActiveReferencePictures = 16; + + ext->fieldOffsetGranularity.x = 0; + ext->fieldOffsetGranularity.y = 0; + ext->maxLevelIdc = 51; + strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME); + pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION; + break; + } + default: + break; + } + return VK_SUCCESS; +} + +VkResult +anv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceVideoFormatInfoKHR *pVideoFormatInfo, + uint32_t *pVideoFormatPropertyCount, + VkVideoFormatPropertiesKHR *pVideoFormatProperties) +{ + *pVideoFormatPropertyCount = 1; + + if (!pVideoFormatProperties) + return VK_SUCCESS; + + pVideoFormatProperties[0].format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; + pVideoFormatProperties[0].imageType = VK_IMAGE_TYPE_2D; + pVideoFormatProperties[0].imageTiling = VK_IMAGE_TILING_OPTIMAL; + pVideoFormatProperties[0].imageUsageFlags = pVideoFormatInfo->imageUsage; + return VK_SUCCESS; +} + +static void +get_h264_video_session_mem_reqs(struct anv_video_session *vid, + VkVideoSessionMemoryRequirementsKHR *mem_reqs, + uint32_t memory_types) +{ + uint32_t width_in_mb = align(vid->vk.max_coded.width, ANV_MB_WIDTH) / ANV_MB_WIDTH; + /* intra row store is width in macroblocks * 64 */ + mem_reqs[0].memoryBindIndex = ANV_VID_MEM_H264_INTRA_ROW_STORE; + mem_reqs[0].memoryRequirements.size = width_in_mb * 64; + mem_reqs[0].memoryRequirements.alignment = 4096; + mem_reqs[0].memoryRequirements.memoryTypeBits = memory_types; + + /* deblocking filter row store is width in macroblocks * 64 * 4*/ + mem_reqs[1].memoryBindIndex = ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE; + mem_reqs[1].memoryRequirements.size = width_in_mb * 64 * 4; + mem_reqs[1].memoryRequirements.alignment = 4096; + mem_reqs[1].memoryRequirements.memoryTypeBits = memory_types; + + /* bsd mpc row scratch is width in macroblocks * 64 * 2 */ + mem_reqs[2].memoryBindIndex = ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH; + mem_reqs[2].memoryRequirements.size = width_in_mb * 64 * 2; + mem_reqs[2].memoryRequirements.alignment = 4096; + mem_reqs[2].memoryRequirements.memoryTypeBits = memory_types; + + /* mpr row scratch is width in macroblocks * 64 * 2 */ + mem_reqs[3].memoryBindIndex = ANV_VID_MEM_H264_MPR_ROW_SCRATCH; + mem_reqs[3].memoryRequirements.size = width_in_mb * 64 * 2; + mem_reqs[3].memoryRequirements.alignment = 4096; + mem_reqs[3].memoryRequirements.memoryTypeBits = memory_types; +} + +VkResult +anv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, + VkVideoSessionKHR videoSession, + uint32_t *pVideoSessionMemoryRequirementsCount, + VkVideoSessionMemoryRequirementsKHR *mem_reqs) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_video_session, vid, videoSession); + + switch (vid->vk.op) { + case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: + *pVideoSessionMemoryRequirementsCount = ANV_VIDEO_MEM_REQS_H264; + break; + default: + unreachable("unknown codec"); + } + if (!mem_reqs) + return VK_SUCCESS; + + uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1; + switch (vid->vk.op) { + case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: + get_h264_video_session_mem_reqs(vid, mem_reqs, memory_types); + break; + default: + unreachable("unknown codec"); + } + + return VK_SUCCESS; +} + +VkResult +anv_UpdateVideoSessionParametersKHR(VkDevice _device, + VkVideoSessionParametersKHR _params, + const VkVideoSessionParametersUpdateInfoKHR *pUpdateInfo) +{ + ANV_FROM_HANDLE(anv_video_session_params, params, _params); + return vk_video_session_parameters_update(¶ms->vk, pUpdateInfo); +} + +static void +copy_bind(struct anv_vid_mem *dst, + const VkBindVideoSessionMemoryInfoKHR *src) +{ + dst->mem = anv_device_memory_from_handle(src->memory); + dst->offset = src->memoryOffset; + dst->size = src->memorySize; +} + +VkResult +anv_BindVideoSessionMemoryKHR(VkDevice _device, + VkVideoSessionKHR videoSession, + uint32_t bind_mem_count, + const VkBindVideoSessionMemoryInfoKHR *bind_mem) +{ + ANV_FROM_HANDLE(anv_video_session, vid, videoSession); + + assert(bind_mem_count == 4); + switch (vid->vk.op) { + case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: + for (unsigned i = 0; i < bind_mem_count; i++) { + copy_bind(&vid->vid_mem[bind_mem[i].memoryBindIndex], &bind_mem[i]); + } + break; + default: + unreachable("unknown codec"); + } + return VK_SUCCESS; +} diff --git a/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c b/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c index 3958452f0..4c675e985 100644 --- a/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c +++ b/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c @@ -31,6 +31,7 @@ #include "genxml/gen_macros.h" #include "genxml/genX_pack.h" +#include "genxml/genX_rt_pack.h" #if GFX_VERx10 >= 125 @@ -167,7 +168,7 @@ get_gpu_size_estimate(const VkAccelerationStructureBuildGeometryInfoKHR *pInfo, struct MKSizeEstimate est = {}; uint64_t size = sizeof(BVHBase); - size = align_u64(size, 64); + size = align64(size, 64); /* Must immediately follow BVHBase because we use fixed offset to nodes. */ est.node_data_start = size; @@ -258,25 +259,25 @@ get_gpu_size_estimate(const VkAccelerationStructureBuildGeometryInfoKHR *pInfo, unreachable("Unsupported acceleration structure type"); } - size = align_u64(size, 64); + size = align64(size, 64); est.instance_descs_start = size; size += sizeof(struct InstanceDesc) * num_instances; est.geo_meta_data_start = size; size += sizeof(struct GeoMetaData) * pInfo->geometryCount; - size = align_u64(size, 64); + size = align64(size, 64); - assert(size == align_u64(size, 64)); + assert(size == align64(size, 64)); est.back_pointer_start = size; const bool alloc_backpointers = false; /* RT TODO */ if (alloc_backpointers) { size += est.max_inner_nodes * sizeof(uint32_t); - size = align_u64(size, 64); + size = align64(size, 64); } assert(size < UINT32_MAX); - est.sizeTotal = align_u64(size, 64); + est.sizeTotal = align64(size, 64); return est; } @@ -392,62 +393,6 @@ genX(GetAccelerationStructureBuildSizesKHR)( pSizeInfo->updateScratchSize = gpu_size_info.updateScratchSize; } -VkResult -genX(CreateAccelerationStructureKHR)( - VkDevice _device, - const VkAccelerationStructureCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkAccelerationStructureKHR* pAccelerationStructure) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); - struct anv_acceleration_structure *accel; - - accel = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*accel), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (accel == NULL) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - vk_object_base_init(&device->vk, &accel->base, - VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR); - - accel->size = pCreateInfo->size; - accel->address = anv_address_add(buffer->address, pCreateInfo->offset); - - *pAccelerationStructure = anv_acceleration_structure_to_handle(accel); - - return VK_SUCCESS; -} - -void -genX(DestroyAccelerationStructureKHR)( - VkDevice _device, - VkAccelerationStructureKHR accelerationStructure, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_acceleration_structure, accel, accelerationStructure); - - if (!accel) - return; - - vk_object_base_finish(&accel->base); - vk_free2(&device->vk.alloc, pAllocator, accel); -} - -VkDeviceAddress -genX(GetAccelerationStructureDeviceAddressKHR)( - VkDevice device, - const VkAccelerationStructureDeviceAddressInfoKHR* pInfo) -{ - ANV_FROM_HANDLE(anv_acceleration_structure, accel, - pInfo->accelerationStructure); - - assert(!anv_address_is_null(accel->address)); - - return anv_address_physical(accel->address); -} - void genX(GetDeviceAccelerationStructureCompatibilityKHR)( VkDevice _device, @@ -703,12 +648,12 @@ cmd_build_acceleration_structures( const uint32_t *pMaxPrimitiveCounts = ppMaxPrimitiveCounts ? ppMaxPrimitiveCounts[i] : NULL; - ANV_FROM_HANDLE(anv_acceleration_structure, dst_accel, + ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, pInfo->dstAccelerationStructure); bs->build_method = device->bvh_build_method; - bs->bvh_addr = dst_accel->address; + bs->bvh_addr = anv_address_from_u64(vk_acceleration_structure_get_va(dst_accel)); bs->estimate = get_gpu_size_estimate(pInfo, pBuildRangeInfos, pMaxPrimitiveCounts); @@ -872,6 +817,17 @@ cmd_build_acceleration_structures( &data, sizeof(data)); } + if (anv_cmd_buffer_is_render_queue(cmd_buffer)) + genX(flush_pipeline_select_gpgpu)(cmd_buffer); + + /* Due to the nature of GRL and its heavy use of jumps/predication, we + * cannot tell exactly in what order the CFE_STATE we insert are going to + * be executed. So always use the largest possible size. + */ + genX(cmd_buffer_ensure_cfe_state)( + cmd_buffer, + cmd_buffer->device->physical->max_grl_scratch_size); + /* Round 1 : init_globals kernel */ genX(grl_misc_batched_init_globals)( cmd_buffer, @@ -1162,24 +1118,26 @@ genX(CmdCopyAccelerationStructureKHR)( const VkCopyAccelerationStructureInfoKHR* pInfo) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_acceleration_structure, src_accel, pInfo->src); - ANV_FROM_HANDLE(anv_acceleration_structure, dst_accel, pInfo->dst); + ANV_FROM_HANDLE(vk_acceleration_structure, src_accel, pInfo->src); + ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, pInfo->dst); assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR || pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR); if (pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR) { - struct anv_address src_size_addr = anv_address_add( - src_accel->address, - offsetof(struct BVHBase, Meta.allocationSize)); - genX(grl_copy_clone_indirect)(cmd_buffer, - anv_address_physical(dst_accel->address), - anv_address_physical(src_accel->address), - anv_address_physical(src_size_addr)); + uint64_t src_size_addr = + vk_acceleration_structure_get_va(src_accel) + + offsetof(struct BVHBase, Meta.allocationSize); + genX(grl_copy_clone_indirect)( + cmd_buffer, + vk_acceleration_structure_get_va(dst_accel), + vk_acceleration_structure_get_va(src_accel), + src_size_addr); } else { - genX(grl_copy_compact)(cmd_buffer, - anv_address_physical(dst_accel->address), - anv_address_physical(src_accel->address)); + genX(grl_copy_compact)( + cmd_buffer, + vk_acceleration_structure_get_va(dst_accel), + vk_acceleration_structure_get_va(src_accel)); } cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT; @@ -1191,19 +1149,20 @@ genX(CmdCopyAccelerationStructureToMemoryKHR)( const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_acceleration_structure, src_accel, pInfo->src); + ANV_FROM_HANDLE(vk_acceleration_structure, src_accel, pInfo->src); struct anv_device *device = cmd_buffer->device; - struct anv_address src_size_addr = anv_address_add( - src_accel->address, - offsetof(struct BVHBase, Meta.allocationSize)); + uint64_t src_size_addr = + vk_acceleration_structure_get_va(src_accel) + + offsetof(struct BVHBase, Meta.allocationSize); assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR); - genX(grl_copy_serialize_indirect)(cmd_buffer, - pInfo->dst.deviceAddress, - anv_address_physical(src_accel->address), - anv_address_physical(device->rt_uuid_addr), - anv_address_physical(src_size_addr)); + genX(grl_copy_serialize_indirect)( + cmd_buffer, + pInfo->dst.deviceAddress, + vk_acceleration_structure_get_va(src_accel), + anv_address_physical(device->rt_uuid_addr), + src_size_addr); cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT; } @@ -1214,16 +1173,17 @@ genX(CmdCopyMemoryToAccelerationStructureKHR)( const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_acceleration_structure, dst_accel, pInfo->dst); + ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, pInfo->dst); assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR); uint64_t src_size_addr = pInfo->src.deviceAddress + offsetof(struct SerializationHeader, DeserializedSizeInBytes); - genX(grl_copy_deserialize_indirect)(cmd_buffer, - anv_address_physical(dst_accel->address), - pInfo->src.deviceAddress, - src_size_addr); + genX(grl_copy_deserialize_indirect)( + cmd_buffer, + vk_acceleration_structure_get_va(dst_accel), + pInfo->src.deviceAddress, + src_size_addr); cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT; } diff --git a/lib/mesa/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/lib/mesa/src/intel/vulkan/genX_cmd_draw_generated_indirect.h new file mode 100644 index 000000000..ccb1bd7a2 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/genX_cmd_draw_generated_indirect.h @@ -0,0 +1,750 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef GENX_CMD_GENERATED_INDIRECT_DRAW_H +#define GENX_CMD_GENERATED_INDIRECT_DRAW_H + +#include <assert.h> +#include <stdbool.h> + +#include "util/macros.h" + +#include "common/intel_genX_state.h" + +#include "anv_private.h" +#include "anv_generated_indirect_draws.h" + +/* This is a maximum number of items a fragment shader can generate due to the + * viewport size. + */ +#define MAX_GENERATED_DRAW_COUNT (8192 * 8192) + +static void +genX(cmd_buffer_emit_generate_draws_pipeline)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch *batch = &cmd_buffer->generation_batch; + struct anv_device *device = cmd_buffer->device; + const struct anv_shader_bin *draw_kernel = device->generated_draw_kernel; + const struct brw_wm_prog_data *prog_data = + brw_wm_prog_data_const(draw_kernel->prog_data); + + uint32_t *dw = anv_batch_emitn(batch, + 1 + 2 * GENX(VERTEX_ELEMENT_STATE_length), + GENX(3DSTATE_VERTEX_ELEMENTS)); + /* You might think there is some shady stuff going here and you would be + * right. We're setting up 2 VERTEX_ELEMENT_STATE yet we're only providing + * 1 (positions) VERTEX_BUFFER_STATE later. + * + * Find more about how to set up a 3D pipeline with a fragment shader but + * without a vertex shader in blorp_emit_vertex_elements() in + * blorp_genX_exec.h. + */ + GENX(VERTEX_ELEMENT_STATE_pack)( + batch, dw + 1, &(struct GENX(VERTEX_ELEMENT_STATE)) { + .VertexBufferIndex = 1, + .Valid = true, + .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT, + .SourceElementOffset = 0, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, + }); + GENX(VERTEX_ELEMENT_STATE_pack)( + batch, dw + 3, &(struct GENX(VERTEX_ELEMENT_STATE)) { + .VertexBufferIndex = 0, + .Valid = true, + .SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT, + .SourceElementOffset = 0, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = VFCOMP_STORE_SRC, + .Component2Control = VFCOMP_STORE_SRC, + .Component3Control = VFCOMP_STORE_1_FP, + }); + + anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vf); + anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs) { + sgvs.InstanceIDEnable = true; + sgvs.InstanceIDComponentNumber = COMP_1; + sgvs.InstanceIDElementOffset = 0; + } +#if GFX_VER >= 11 + anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS_2), sgvs); +#endif + anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) { + vfi.InstancingEnable = false; + vfi.VertexElementIndex = 0; + } + anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) { + vfi.InstancingEnable = false; + vfi.VertexElementIndex = 1; + } + + anv_batch_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) { + topo.PrimitiveTopologyType = _3DPRIM_RECTLIST; + } + + /* Emit URB setup. We tell it that the VS is active because we want it to + * allocate space for the VS. Even though one isn't run, we need VUEs to + * store the data that VF is going to pass to SOL. + */ + const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 }; + + genX(emit_l3_config)(batch, device, device->generated_draw_l3_config); + + cmd_buffer->state.current_l3_config = device->generated_draw_l3_config; + + enum intel_urb_deref_block_size deref_block_size; + genX(emit_urb_setup)(device, batch, device->generated_draw_l3_config, + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, + entry_size, &deref_block_size); + + anv_batch_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) { + ps_blend.HasWriteableRT = true; + } + + anv_batch_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm); + +#if GFX_VER >= 12 + anv_batch_emit(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) { + db.DepthBoundsTestEnable = false; + db.DepthBoundsTestMinValue = 0.0; + db.DepthBoundsTestMaxValue = 1.0; + } +#endif + + anv_batch_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms); + anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_MASK), sm) { + sm.SampleMask = 0x1; + } + + anv_batch_emit(batch, GENX(3DSTATE_VS), vs); + anv_batch_emit(batch, GENX(3DSTATE_HS), hs); + anv_batch_emit(batch, GENX(3DSTATE_TE), te); + anv_batch_emit(batch, GENX(3DSTATE_DS), DS); + +#if GFX_VERx10 >= 125 + if (device->vk.enabled_extensions.NV_mesh_shader || + device->vk.enabled_extensions.EXT_mesh_shader) { + anv_batch_emit(batch, GENX(3DSTATE_MESH_CONTROL), mesh); + anv_batch_emit(batch, GENX(3DSTATE_TASK_CONTROL), task); + } +#endif + + anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so); + + anv_batch_emit(batch, GENX(3DSTATE_GS), gs); + + anv_batch_emit(batch, GENX(3DSTATE_CLIP), clip) { + clip.PerspectiveDivideDisable = true; + } + + anv_batch_emit(batch, GENX(3DSTATE_SF), sf) { +#if GFX_VER >= 12 + sf.DerefBlockSize = deref_block_size; +#endif + } + + anv_batch_emit(batch, GENX(3DSTATE_RASTER), raster) { + raster.CullMode = CULLMODE_NONE; + } + + anv_batch_emit(batch, GENX(3DSTATE_SBE), sbe) { + sbe.VertexURBEntryReadOffset = 1; + sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; + sbe.VertexURBEntryReadLength = MAX2((prog_data->num_varying_inputs + 1) / 2, 1); + sbe.ConstantInterpolationEnable = prog_data->flat_inputs; + sbe.ForceVertexURBEntryReadLength = true; + sbe.ForceVertexURBEntryReadOffset = true; + for (unsigned i = 0; i < 32; i++) + sbe.AttributeActiveComponentFormat[i] = ACF_XYZW; + } + + anv_batch_emit(batch, GENX(3DSTATE_WM), wm); + + anv_batch_emit(batch, GENX(3DSTATE_PS), ps) { + intel_set_ps_dispatch_state(&ps, device->info, prog_data, + 1 /* rasterization_samples */, + 0 /* msaa_flags */); + + ps.VectorMaskEnable = prog_data->uses_vmask; + + ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0; + ps.PushConstantEnable = prog_data->base.nr_params > 0 || + prog_data->base.ubo_ranges[0].length; + + ps.DispatchGRFStartRegisterForConstantSetupData0 = + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0); + ps.DispatchGRFStartRegisterForConstantSetupData1 = + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1); + ps.DispatchGRFStartRegisterForConstantSetupData2 = + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2); + + ps.KernelStartPointer0 = draw_kernel->kernel.offset + + brw_wm_prog_data_prog_offset(prog_data, ps, 0); + ps.KernelStartPointer1 = draw_kernel->kernel.offset + + brw_wm_prog_data_prog_offset(prog_data, ps, 1); + ps.KernelStartPointer2 = draw_kernel->kernel.offset + + brw_wm_prog_data_prog_offset(prog_data, ps, 2); + + ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1; + } + + anv_batch_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) { + psx.PixelShaderValid = true; + psx.AttributeEnable = prog_data->num_varying_inputs > 0; + psx.PixelShaderIsPerSample = prog_data->persample_dispatch; + psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode; + psx.PixelShaderComputesStencil = prog_data->computed_stencil; + } + + anv_batch_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 4 * GENX(CC_VIEWPORT_length), 32); + struct GENX(CC_VIEWPORT) cc_viewport = { + .MinimumDepth = 0.0f, + .MaximumDepth = 1.0f, + }; + GENX(CC_VIEWPORT_pack)(NULL, cc_state.map, &cc_viewport); + cc.CCViewportPointer = cc_state.offset; + } + +#if GFX_VER >= 12 + /* Disable Primitive Replication. */ + anv_batch_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr); +#endif + + anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc); + anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_HS), alloc); + anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_DS), alloc); + anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), alloc); + anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), alloc) { + alloc.ConstantBufferOffset = 0; + alloc.ConstantBufferSize = cmd_buffer->device->info->max_constant_urb_size_kb; + } + +#if GFX_VERx10 == 125 + /* DG2: Wa_22011440098 + * MTL: Wa_18022330953 + * + * In 3D mode, after programming push constant alloc command immediately + * program push constant command(ZERO length) without any commit between + * them. + * + * Note that Wa_16011448509 isn't needed here as all address bits are zero. + */ + anv_batch_emit(batch, GENX(3DSTATE_CONSTANT_ALL), c) { + /* Update empty push constants for all stages (bitmask = 11111b) */ + c.ShaderUpdateEnable = 0x1f; + c.MOCS = anv_mocs(cmd_buffer->device, NULL, 0); + } +#endif + +#if GFX_VER == 9 + /* Allocate a binding table for Gfx9 for 2 reason : + * + * 1. we need a to emit a 3DSTATE_BINDING_TABLE_POINTERS_PS to make the + * HW apply the preceeding 3DSTATE_CONSTANT_PS + * + * 2. Emitting an empty 3DSTATE_BINDING_TABLE_POINTERS_PS would cause RT + * writes (even though they're empty) to disturb later writes + * (probably due to RT cache) + * + * Our binding table only has one entry to the null surface. + */ + uint32_t bt_offset; + cmd_buffer->generation_bt_state = + anv_cmd_buffer_alloc_binding_table(cmd_buffer, 1, &bt_offset); + if (cmd_buffer->generation_bt_state.map == NULL) { + VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); + if (result != VK_SUCCESS) + return; + + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + genX(cmd_buffer_emit_state_base_address)(cmd_buffer); + + cmd_buffer->generation_bt_state = + anv_cmd_buffer_alloc_binding_table(cmd_buffer, 1, &bt_offset); + assert(cmd_buffer->generation_bt_state.map != NULL); + } + + uint32_t *bt_map = cmd_buffer->generation_bt_state.map; + bt_map[0] = anv_bindless_state_for_binding_table( + cmd_buffer->device->null_surface_state).offset + bt_offset; + + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; +#endif + + cmd_buffer->state.gfx.vb_dirty = BITFIELD_BIT(0); + cmd_buffer->state.gfx.dirty |= ~(ANV_CMD_DIRTY_INDEX_BUFFER | + ANV_CMD_DIRTY_XFB_ENABLE); + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + cmd_buffer->state.gfx.push_constant_stages = VK_SHADER_STAGE_FRAGMENT_BIT; + vk_dynamic_graphics_state_dirty_all(&cmd_buffer->vk.dynamic_graphics_state); +} + +static void +genX(cmd_buffer_emit_generate_draws_vertex)(struct anv_cmd_buffer *cmd_buffer, + uint32_t draw_count) +{ + struct anv_batch *batch = &cmd_buffer->generation_batch; + struct anv_state vs_data_state = + anv_cmd_buffer_alloc_dynamic_state( + cmd_buffer, 9 * sizeof(uint32_t), 32); + + float x0 = 0.0f, x1 = MIN2(draw_count, 8192); + float y0 = 0.0f, y1 = DIV_ROUND_UP(draw_count, 8192); + float z = 0.0f; + + float *vertices = vs_data_state.map; + vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */ + vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */ + vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */ + + uint32_t *dw = anv_batch_emitn(batch, + 1 + GENX(VERTEX_BUFFER_STATE_length), + GENX(3DSTATE_VERTEX_BUFFERS)); + GENX(VERTEX_BUFFER_STATE_pack)(batch, dw + 1, + &(struct GENX(VERTEX_BUFFER_STATE)) { + .VertexBufferIndex = 0, + .AddressModifyEnable = true, + .BufferStartingAddress = (struct anv_address) { + .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .offset = vs_data_state.offset, + }, + .BufferPitch = 3 * sizeof(float), + .BufferSize = 9 * sizeof(float), + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), +#if GFX_VER >= 12 + .L3BypassDisable = true, +#endif + }); +} + +static void +genX(cmd_buffer_emit_generated_push_data)(struct anv_cmd_buffer *cmd_buffer, + struct anv_state push_data_state) +{ + struct anv_batch *batch = &cmd_buffer->generation_batch; + struct anv_address push_data_addr = anv_state_pool_state_address( + &cmd_buffer->device->dynamic_state_pool, push_data_state); + + /* Don't use 3DSTATE_CONSTANT_ALL on Gfx12.0 due to Wa_16011448509 */ +#if GFX_VERx10 > 120 + const uint32_t num_dwords = GENX(3DSTATE_CONSTANT_ALL_length) + + GENX(3DSTATE_CONSTANT_ALL_DATA_length); + uint32_t *dw = + anv_batch_emitn(batch, num_dwords, + GENX(3DSTATE_CONSTANT_ALL), + .ShaderUpdateEnable = BITFIELD_BIT(MESA_SHADER_FRAGMENT), + .PointerBufferMask = 0x1, + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0)); + + GENX(3DSTATE_CONSTANT_ALL_DATA_pack)( + batch, dw + GENX(3DSTATE_CONSTANT_ALL_length), + &(struct GENX(3DSTATE_CONSTANT_ALL_DATA)) { + .PointerToConstantBuffer = push_data_addr, + .ConstantBufferReadLength = DIV_ROUND_UP(push_data_state.alloc_size, 32), + }); +#else + /* The Skylake PRM contains the following restriction: + * + * "The driver must ensure The following case does not occur + * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with + * buffer 3 read length equal to zero committed followed by a + * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to + * zero committed." + * + * To avoid this, we program the highest slot. + */ + anv_batch_emit(batch, GENX(3DSTATE_CONSTANT_PS), c) { + c.MOCS = anv_mocs(cmd_buffer->device, NULL, 0); + c.ConstantBody.ReadLength[3] = DIV_ROUND_UP(push_data_state.alloc_size, 32); + c.ConstantBody.Buffer[3] = push_data_addr; + } +#endif +} + +static struct anv_generated_indirect_params * +genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer, + struct anv_address generated_cmds_addr, + uint32_t generated_cmd_stride, + struct anv_address indirect_data_addr, + uint32_t indirect_data_stride, + struct anv_address draw_id_addr, + uint32_t item_base, + uint32_t item_count, + struct anv_address count_addr, + uint32_t max_count, + bool indexed) +{ + struct anv_batch *batch = &cmd_buffer->generation_batch; + + genX(cmd_buffer_emit_generate_draws_vertex)(cmd_buffer, item_count); + + struct anv_state push_data_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + sizeof(struct anv_generated_indirect_params), + ANV_UBO_ALIGNMENT); + + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + + struct anv_generated_indirect_params *push_data = push_data_state.map; + *push_data = (struct anv_generated_indirect_params) { + .draw = { + .draw_id_addr = anv_address_physical(draw_id_addr), + .indirect_data_addr = anv_address_physical(indirect_data_addr), + .indirect_data_stride = indirect_data_stride, + .flags = (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) | + (cmd_buffer->state.conditional_render_enabled ? + ANV_GENERATED_FLAG_PREDICATED : 0) | + ((vs_prog_data->uses_firstvertex || + vs_prog_data->uses_baseinstance) ? + ANV_GENERATED_FLAG_BASE : 0) | + (vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) | + (anv_mocs(cmd_buffer->device, indirect_data_addr.bo, + ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) | + ((generated_cmd_stride / 4) << 16), + .draw_base = item_base, + /* If count_addr is not NULL, we'll edit it through a the command + * streamer. + */ + .draw_count = anv_address_is_null(count_addr) ? max_count : 0, + .max_draw_count = max_count, + .instance_multiplier = pipeline->instance_multiplier, + }, + .indirect_data_addr = anv_address_physical(indirect_data_addr), + .generated_cmds_addr = anv_address_physical(generated_cmds_addr), + .draw_ids_addr = anv_address_physical(draw_id_addr), + }; + + if (!anv_address_is_null(count_addr)) { + /* Copy the draw count into the push constants so that the generation + * gets the value straight away and doesn't even need to access memory. + */ + struct mi_builder b; + mi_builder_init(&b, cmd_buffer->device->info, batch); + mi_memcpy(&b, + anv_address_add((struct anv_address) { + .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .offset = push_data_state.offset, + }, + offsetof(struct anv_generated_indirect_params, draw.draw_count)), + count_addr, 4); + + /* Make sure the memcpy landed for the generating draw call to pick up + * the value. + */ + anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) { + pc.CommandStreamerStallEnable = true; + } + } + + /* Only emit the data after the memcpy above. */ + genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state); + +#if GFX_VER == 9 + /* Why are the push constants not flushed without a binding table + * update?? + */ + anv_batch_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), btp) { + btp.PointertoPSBindingTable = cmd_buffer->generation_bt_state.offset; + } +#endif + + anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) { + prim.VertexAccessType = SEQUENTIAL; + prim.PrimitiveTopologyType = _3DPRIM_RECTLIST; + prim.VertexCountPerInstance = 3; + prim.InstanceCount = 1; + } + + return push_data; +} + +static void +genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_buffer) +{ +#if GFX_VER >= 12 + anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) { + arb.PreParserDisableMask = true; + arb.PreParserDisable = true; + } +#endif + + anv_batch_emit_ensure_space(&cmd_buffer->generation_batch, 4); + + trace_intel_begin_generate_draws(&cmd_buffer->trace); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) { + bbs.AddressSpaceIndicator = ASI_PPGTT; + bbs.BatchBufferStartAddress = + anv_batch_current_address(&cmd_buffer->generation_batch); + } + + cmd_buffer->generation_return_addr = anv_batch_current_address(&cmd_buffer->batch); + + trace_intel_end_generate_draws(&cmd_buffer->trace); + + genX(cmd_buffer_emit_generate_draws_pipeline)(cmd_buffer); + +} + +static struct anv_address +genX(cmd_buffer_get_draw_id_addr)(struct anv_cmd_buffer *cmd_buffer, + uint32_t draw_id_count) +{ +#if GFX_VER >= 11 + return ANV_NULL_ADDRESS; +#else + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + if (!vs_prog_data->uses_drawid) + return ANV_NULL_ADDRESS; + + struct anv_state draw_id_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 4 * draw_id_count, 4); + return anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool, + draw_id_state); +#endif +} + +static uint32_t +genX(cmd_buffer_get_generated_draw_stride)(struct anv_cmd_buffer *cmd_buffer) +{ + /* With the extended parameters in 3DPRIMITIVE on Gfx11+ we can emit + * everything. Prior to this, we need to emit a couple of + * VERTEX_BUFFER_STATE. + */ +#if GFX_VER >= 11 + return 4 * GENX(3DPRIMITIVE_EXTENDED_length); +#else + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + + uint32_t len = 0; + + if (vs_prog_data->uses_firstvertex || + vs_prog_data->uses_baseinstance || + vs_prog_data->uses_drawid) { + len += 4; /* 3DSTATE_VERTEX_BUFFERS */ + + if (vs_prog_data->uses_firstvertex || + vs_prog_data->uses_baseinstance) + len += 4 * GENX(VERTEX_BUFFER_STATE_length); + + if (vs_prog_data->uses_drawid) + len += 4 * GENX(VERTEX_BUFFER_STATE_length); + } + + return len + 4 * GENX(3DPRIMITIVE_length); +#endif +} + +static void +genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer, + struct anv_generated_indirect_params *params) +{ + /* We don't know the end_addr until we have emitted all the generation + * draws. Go and edit the address of all the push parameters. + */ + uint64_t end_addr = + anv_address_physical(anv_batch_current_address(&cmd_buffer->batch)); + while (params != NULL) { + params->draw.end_addr = end_addr; + params = params->prev; + } +} + +static void +genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer, + struct anv_address indirect_data_addr, + uint32_t indirect_data_stride, + struct anv_address count_addr, + uint32_t max_draw_count, + bool indexed) +{ + const bool start_generation_batch = + anv_address_is_null(cmd_buffer->generation_return_addr); + + genX(flush_pipeline_select_3d)(cmd_buffer); + + struct anv_address draw_id_addr = + genX(cmd_buffer_get_draw_id_addr)(cmd_buffer, max_draw_count); + +#if GFX_VER == 9 + /* Mark the VB-0 as using the entire dynamic state pool area, but only for + * the draw call starting the generation batch. All the following ones will + * use the same area. + */ + if (start_generation_batch) { + genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, 0, + (struct anv_address) { + .offset = DYNAMIC_STATE_POOL_MIN_ADDRESS, + }, + DYNAMIC_STATE_POOL_SIZE); + } + + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + + if (vs_prog_data->uses_baseinstance || + vs_prog_data->uses_firstvertex) { + /* We're using the indirect buffer directly to source base instance & + * first vertex values. Mark the entire area as used. + */ + genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX, + indirect_data_addr, + indirect_data_stride * max_draw_count); + } + + if (vs_prog_data->uses_drawid) { + /* Mark the whole draw id buffer as used. */ + genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX, + draw_id_addr, + sizeof(uint32_t) * max_draw_count); + } +#endif + + /* Apply the pipeline flush here so the indirect data is available for the + * generation shader. + */ + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + + if (start_generation_batch) + genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer); + + /* In order to have the vertex fetch gather the data we need to have a non + * 0 stride. It's possible to have a 0 stride given by the application when + * draw_count is 1, but we need a correct value for the + * VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this + * correctly : + * + * Vulkan spec, vkCmdDrawIndirect: + * + * "If drawCount is less than or equal to one, stride is ignored." + */ + assert(indirect_data_stride > 0); + + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + + /* Emit the 3D state in the main batch. */ + genX(cmd_buffer_flush_gfx_state)(cmd_buffer); + + const uint32_t draw_cmd_stride = + genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer); + + struct anv_generated_indirect_params *last_params = NULL; + uint32_t item_base = 0; + while (item_base < max_draw_count) { + const uint32_t item_count = MIN2(max_draw_count - item_base, + MAX_GENERATED_DRAW_COUNT); + const uint32_t draw_cmd_size = item_count * draw_cmd_stride; + + /* Ensure we have enough contiguous space for all the draws so that the + * compute shader can edit all the 3DPRIMITIVEs from a single base + * address. + * + * TODO: we might have to split that if the amount of space is to large (at + * 1Mb?). + */ + VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch, + draw_cmd_size); + if (result != VK_SUCCESS) + return; + + struct anv_generated_indirect_params *params = + genX(cmd_buffer_emit_generate_draws)( + cmd_buffer, + anv_batch_current_address(&cmd_buffer->batch), + draw_cmd_stride, + anv_address_add(indirect_data_addr, + item_base * indirect_data_stride), + indirect_data_stride, + anv_address_add(draw_id_addr, 4 * item_base), + item_base, + item_count, + count_addr, + max_draw_count, + indexed); + + anv_batch_advance(&cmd_buffer->batch, draw_cmd_size); + + item_base += item_count; + + params->prev = last_params; + last_params = params; + } + + genX(cmd_buffer_rewrite_forward_end_addr)(cmd_buffer, last_params); + +#if GFX_VER == 9 + update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, indexed ? RANDOM : SEQUENTIAL); +#endif +} + +static void +genX(cmd_buffer_flush_generated_draws)(struct anv_cmd_buffer *cmd_buffer) +{ + /* No return address setup means we don't have to do anything */ + if (anv_address_is_null(cmd_buffer->generation_return_addr)) + return; + + struct anv_batch *batch = &cmd_buffer->generation_batch; + + /* Wait for all the generation vertex shader to generate the commands. */ + genX(emit_apply_pipe_flushes)(batch, + cmd_buffer->device, + _3D, +#if GFX_VER == 9 + ANV_PIPE_VF_CACHE_INVALIDATE_BIT | +#endif + ANV_PIPE_DATA_CACHE_FLUSH_BIT | + ANV_PIPE_CS_STALL_BIT, + NULL /* query_bits */); + +#if GFX_VER >= 12 + anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) { + arb.PreParserDisableMask = true; + arb.PreParserDisable = false; + } +#else + /* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter + * as the prefetch shouldn't follow the MI_BATCH_BUFFER_START. + */ +#endif + + /* Return to the main batch. */ + anv_batch_emit(batch, GENX(MI_BATCH_BUFFER_START), bbs) { + bbs.AddressSpaceIndicator = ASI_PPGTT; + bbs.BatchBufferStartAddress = cmd_buffer->generation_return_addr; + } + + cmd_buffer->generation_return_addr = ANV_NULL_ADDRESS; +} + +#endif /* GENX_CMD_GENERATED_INDIRECT_DRAW_H */ diff --git a/lib/mesa/src/intel/vulkan/genX_cmd_draw_helpers.h b/lib/mesa/src/intel/vulkan/genX_cmd_draw_helpers.h new file mode 100644 index 000000000..8db6b5e75 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/genX_cmd_draw_helpers.h @@ -0,0 +1,154 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef GENX_CMD_DRAW_HELPERS_H +#define GENX_CMD_DRAW_HELPERS_H + +#include <assert.h> +#include <stdbool.h> + +#include "anv_private.h" + +#if GFX_VER < 11 +static void +emit_vertex_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_address addr, + uint32_t size, uint32_t index) +{ + uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5, + GENX(3DSTATE_VERTEX_BUFFERS)); + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1, + &(struct GENX(VERTEX_BUFFER_STATE)) { + .VertexBufferIndex = index, + .AddressModifyEnable = true, + .BufferPitch = 0, + .MOCS = anv_mocs(cmd_buffer->device, addr.bo, + ISL_SURF_USAGE_VERTEX_BUFFER_BIT), + .NullVertexBuffer = size == 0, + .BufferStartingAddress = addr, + .BufferSize = size + }); + +#if GFX_VER == 9 + genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, + index, addr, size); +#endif +} + +static void +emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_address addr) +{ + emit_vertex_bo(cmd_buffer, addr, addr.bo ? 8 : 0, ANV_SVGS_VB_INDEX); +} + +static void +emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, + uint32_t base_vertex, uint32_t base_instance) +{ + if (base_vertex == 0 && base_instance == 0) { + emit_base_vertex_instance_bo(cmd_buffer, ANV_NULL_ADDRESS); + return; + } + + struct anv_state id_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); + + ((uint32_t *)id_state.map)[0] = base_vertex; + ((uint32_t *)id_state.map)[1] = base_instance; + + struct anv_address addr = + anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool, + id_state); + + emit_base_vertex_instance_bo(cmd_buffer, addr); +} + +static void +emit_draw_index(struct anv_cmd_buffer *cmd_buffer, uint32_t draw_index) +{ + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 4, 4); + + ((uint32_t *)state.map)[0] = draw_index; + + struct anv_address addr = + anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool, + state); + + emit_vertex_bo(cmd_buffer, addr, 4, ANV_DRAWID_VB_INDEX); +} +#endif /* GFX_VER <= 11 */ + +static void +update_dirty_vbs_for_gfx8_vb_flush(struct anv_cmd_buffer *cmd_buffer, + uint32_t access_type) +{ +#if GFX_VER == 9 + const struct vk_dynamic_graphics_state *dyn = + &cmd_buffer->vk.dynamic_graphics_state; + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + + uint64_t vb_used = dyn->vi->bindings_valid; + if (vs_prog_data->uses_firstvertex || + vs_prog_data->uses_baseinstance) + vb_used |= 1ull << ANV_SVGS_VB_INDEX; + if (vs_prog_data->uses_drawid) + vb_used |= 1ull << ANV_DRAWID_VB_INDEX; + + genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer, + access_type, + vb_used); +#endif +} + +#if GFX_VER < 11 +ALWAYS_INLINE static void +cmd_buffer_emit_vertex_constants_and_flush(struct anv_cmd_buffer *cmd_buffer, + const struct brw_vs_prog_data *vs_prog_data, + uint32_t base_vertex, + uint32_t base_instance, + uint32_t draw_id, + bool force_flush) +{ + bool emitted = false; + if (vs_prog_data->uses_firstvertex || + vs_prog_data->uses_baseinstance) { + emit_base_vertex_instance(cmd_buffer, base_vertex, base_instance); + emitted = true; + } + if (vs_prog_data->uses_drawid) { + emit_draw_index(cmd_buffer, draw_id); + emitted = true; + } + /* Emitting draw index or vertex index BOs may result in needing + * additional VF cache flushes. + */ + if (emitted || force_flush) + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); +} +#endif + +#endif /* GENX_CMD_DRAW_HELPERS_H */ diff --git a/lib/mesa/src/intel/vulkan/genX_video.c b/lib/mesa/src/intel/vulkan/genX_video.c new file mode 100644 index 000000000..0192d8703 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/genX_video.c @@ -0,0 +1,447 @@ +/* + * Copyright © 2021 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" + +void +genX(CmdBeginVideoCodingKHR)(VkCommandBuffer commandBuffer, + const VkVideoBeginCodingInfoKHR *pBeginInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_video_session, vid, pBeginInfo->videoSession); + ANV_FROM_HANDLE(anv_video_session_params, params, pBeginInfo->videoSessionParameters); + + cmd_buffer->video.vid = vid; + cmd_buffer->video.params = params; +} + +void +genX(CmdControlVideoCodingKHR)(VkCommandBuffer commandBuffer, + const VkVideoCodingControlInfoKHR *pCodingControlInfo) +{ + +} + +void +genX(CmdEndVideoCodingKHR)(VkCommandBuffer commandBuffer, + const VkVideoEndCodingInfoKHR *pEndCodingInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->video.vid = NULL; + cmd_buffer->video.params = NULL; +} + +static void +anv_h264_decode_video(struct anv_cmd_buffer *cmd_buffer, + const VkVideoDecodeInfoKHR *frame_info) +{ + ANV_FROM_HANDLE(anv_buffer, src_buffer, frame_info->srcBuffer); + struct anv_video_session *vid = cmd_buffer->video.vid; + struct anv_video_session_params *params = cmd_buffer->video.params; + const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info = + vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR); + const StdVideoH264SequenceParameterSet *sps = vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id); + const StdVideoH264PictureParameterSet *pps = vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) { + flush.DWordLength = 2; + flush.VideoPipelineCacheInvalidate = 1; + }; + +#if GFX_VER >= 12 + anv_batch_emit(&cmd_buffer->batch, GENX(MI_FORCE_WAKEUP), wake) { + wake.MFXPowerWellControl = 1; + wake.MaskBits = 768; + } + + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) { + mfx.MFXSyncControlFlag = 1; + } +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_PIPE_MODE_SELECT), sel) { + sel.StandardSelect = SS_AVC; + sel.CodecSelect = Decode; + sel.DecoderShortFormatMode = ShortFormatDriverInterface; + sel.DecoderModeSelect = VLDMode; // Hardcoded + + sel.PreDeblockingOutputEnable = 0; + sel.PostDeblockingOutputEnable = 1; + } + +#if GFX_VER >= 12 + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) { + mfx.MFXSyncControlFlag = 1; + } +#endif + + const struct anv_image_view *iv = anv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding); + const struct anv_image *img = iv->image; + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_SURFACE_STATE), ss) { + ss.Width = img->vk.extent.width - 1; + ss.Height = img->vk.extent.height - 1; + ss.SurfaceFormat = PLANAR_420_8; // assert on this? + ss.InterleaveChroma = 1; + ss.SurfacePitch = img->planes[0].primary_surface.isl.row_pitch_B - 1; + ss.TiledSurface = img->planes[0].primary_surface.isl.tiling != ISL_TILING_LINEAR; + ss.TileWalk = TW_YMAJOR; + + ss.YOffsetforUCb = ss.YOffsetforVCr = + img->planes[1].primary_surface.memory_range.offset / img->planes[0].primary_surface.isl.row_pitch_B; + } + + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_PIPE_BUF_ADDR_STATE), buf) { + bool use_pre_deblock = false; + if (use_pre_deblock) { + buf.PreDeblockingDestinationAddress = anv_image_address(img, + &img->planes[0].primary_surface.memory_range); + } else { + buf.PostDeblockingDestinationAddress = anv_image_address(img, + &img->planes[0].primary_surface.memory_range); + } + buf.PreDeblockingDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.PreDeblockingDestinationAddress.bo, 0), + }; + buf.PostDeblockingDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.PostDeblockingDestinationAddress.bo, 0), + }; + + buf.IntraRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_INTRA_ROW_STORE].mem->bo, vid->vid_mem[ANV_VID_MEM_H264_INTRA_ROW_STORE].offset }; + buf.IntraRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.IntraRowStoreScratchBufferAddress.bo, 0), + }; + buf.DeblockingFilterRowStoreScratchAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE].mem->bo, vid->vid_mem[ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE].offset }; + buf.DeblockingFilterRowStoreScratchAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterRowStoreScratchAddress.bo, 0), + }; + buf.MBStatusBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + buf.MBILDBStreamOutBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + buf.SecondMBILDBStreamOutBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + buf.ScaledReferenceSurfaceAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + buf.OriginalUncompressedPictureSourceAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + buf.StreamOutDataDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + + struct anv_bo *ref_bo = NULL; + for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) { + const struct anv_image_view *ref_iv = anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding); + int idx = frame_info->pReferenceSlots[i].slotIndex; + buf.ReferencePictureAddress[idx] = anv_image_address(ref_iv->image, + &ref_iv->image->planes[0].primary_surface.memory_range); + + if (i == 0) { + ref_bo = ref_iv->image->bindings[0].address.bo; + } + } + buf.ReferencePictureAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, ref_bo, 0), + }; + } + + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_IND_OBJ_BASE_ADDR_STATE), index_obj) { + index_obj.MFXIndirectBitstreamObjectAddress = anv_address_add(src_buffer->address, + frame_info->srcBufferOffset & ~4095); + index_obj.MFXIndirectBitstreamObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, src_buffer->address.bo, 0), + }; + index_obj.MFXIndirectMVObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + index_obj.MFDIndirectITCOEFFObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + index_obj.MFDIndirectITDBLKObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + index_obj.MFCIndirectPAKBSEObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + } + + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_BSP_BUF_BASE_ADDR_STATE), bsp) { + bsp.BSDMPCRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].mem->bo, + vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].offset }; + + bsp.BSDMPCRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, bsp.BSDMPCRowStoreScratchBufferAddress.bo, 0), + }; + bsp.MPRRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_MPR_ROW_SCRATCH].mem->bo, + vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].offset }; + + bsp.MPRRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, bsp.MPRRowStoreScratchBufferAddress.bo, 0), + }; + bsp.BitplaneReadBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + } + + anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_DPB_STATE), avc_dpb) { + for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) { + const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot = + vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR); + const StdVideoDecodeH264ReferenceInfo *ref_info = dpb_slot->pStdReferenceInfo; + int idx = frame_info->pReferenceSlots[i].slotIndex; + avc_dpb.NonExistingFrame[idx] = ref_info->flags.is_non_existing; + avc_dpb.LongTermFrame[idx] = ref_info->flags.used_for_long_term_reference; + if (!ref_info->flags.top_field_flag && !ref_info->flags.bottom_field_flag) + avc_dpb.UsedforReference[idx] = 3; + else + avc_dpb.UsedforReference[idx] = ref_info->flags.top_field_flag | (ref_info->flags.bottom_field_flag << 1); + avc_dpb.LTSTFrameNumberList[idx] = ref_info->FrameNum; + } + } + + anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_PICID_STATE), picid) { + picid.PictureIDRemappingDisable = true; + } + + uint32_t pic_height = sps->pic_height_in_map_units_minus1 + 1; + if (!sps->flags.frame_mbs_only_flag) + pic_height *= 2; + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_AVC_IMG_STATE), avc_img) { + avc_img.FrameWidth = sps->pic_width_in_mbs_minus1; + avc_img.FrameHeight = pic_height - 1; + avc_img.FrameSize = (sps->pic_width_in_mbs_minus1 + 1) * pic_height; + + if (!h264_pic_info->pStdPictureInfo->flags.field_pic_flag) + avc_img.ImageStructure = FramePicture; + else if (h264_pic_info->pStdPictureInfo->flags.bottom_field_flag) + avc_img.ImageStructure = BottomFieldPicture; + else + avc_img.ImageStructure = TopFieldPicture; + + avc_img.WeightedBiPredictionIDC = pps->weighted_bipred_idc; + avc_img.WeightedPredictionEnable = pps->flags.weighted_pred_flag; + avc_img.FirstChromaQPOffset = pps->chroma_qp_index_offset; + avc_img.SecondChromaQPOffset = pps->second_chroma_qp_index_offset; + avc_img.FieldPicture = h264_pic_info->pStdPictureInfo->flags.field_pic_flag; + avc_img.MBAFFMode = (sps->flags.mb_adaptive_frame_field_flag && + !h264_pic_info->pStdPictureInfo->flags.field_pic_flag); + avc_img.FrameMBOnly = sps->flags.frame_mbs_only_flag; + avc_img._8x8IDCTTransformMode = pps->flags.transform_8x8_mode_flag; + avc_img.Direct8x8Inference = sps->flags.direct_8x8_inference_flag; + avc_img.ConstrainedIntraPrediction = pps->flags.constrained_intra_pred_flag; + avc_img.NonReferencePicture = !h264_pic_info->pStdPictureInfo->flags.is_reference; + avc_img.EntropyCodingSyncEnable = pps->flags.entropy_coding_mode_flag; + avc_img.ChromaFormatIDC = sps->chroma_format_idc; + avc_img.TrellisQuantizationChromaDisable = true; + avc_img.NumberofReferenceFrames = frame_info->referenceSlotCount; + avc_img.NumberofActiveReferencePicturesfromL0 = pps->num_ref_idx_l0_default_active_minus1 + 1; + avc_img.NumberofActiveReferencePicturesfromL1 = pps->num_ref_idx_l1_default_active_minus1 + 1; + avc_img.InitialQPValue = pps->pic_init_qp_minus26; + avc_img.PicOrderPresent = pps->flags.bottom_field_pic_order_in_frame_present_flag; + avc_img.DeltaPicOrderAlwaysZero = sps->flags.delta_pic_order_always_zero_flag; + avc_img.PicOrderCountType = sps->pic_order_cnt_type; + avc_img.DeblockingFilterControlPresent = pps->flags.deblocking_filter_control_present_flag; + avc_img.RedundantPicCountPresent = pps->flags.redundant_pic_cnt_present_flag; + avc_img.Log2MaxFrameNumber = sps->log2_max_frame_num_minus4; + avc_img.Log2MaxPicOrderCountLSB = sps->log2_max_pic_order_cnt_lsb_minus4; + avc_img.CurrentPictureFrameNumber = h264_pic_info->pStdPictureInfo->frame_num; + } + + if (pps->flags.pic_scaling_matrix_present_flag) { + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) { + qm.DWordLength = 16; + qm.AVC = AVC_4x4_Intra_MATRIX; + for (unsigned m = 0; m < 3; m++) + for (unsigned q = 0; q < 16; q++) + qm.ForwardQuantizerMatrix[m * 16 + q] = pps->pScalingLists->ScalingList4x4[m][q]; + } + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) { + qm.DWordLength = 16; + qm.AVC = AVC_4x4_Inter_MATRIX; + for (unsigned m = 0; m < 3; m++) + for (unsigned q = 0; q < 16; q++) + qm.ForwardQuantizerMatrix[m * 16 + q] = pps->pScalingLists->ScalingList4x4[m + 3][q]; + } + if (pps->flags.transform_8x8_mode_flag) { + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) { + qm.DWordLength = 16; + qm.AVC = AVC_8x8_Intra_MATRIX; + for (unsigned q = 0; q < 64; q++) + qm.ForwardQuantizerMatrix[q] = pps->pScalingLists->ScalingList8x8[0][q]; + } + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) { + qm.DWordLength = 16; + qm.AVC = AVC_8x8_Inter_MATRIX; + for (unsigned q = 0; q < 64; q++) + qm.ForwardQuantizerMatrix[q] = pps->pScalingLists->ScalingList8x8[3][q]; + } + } + } else if (sps->flags.seq_scaling_matrix_present_flag) { + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) { + qm.DWordLength = 16; + qm.AVC = AVC_4x4_Intra_MATRIX; + for (unsigned m = 0; m < 3; m++) + for (unsigned q = 0; q < 16; q++) + qm.ForwardQuantizerMatrix[m * 16 + q] = sps->pScalingLists->ScalingList4x4[m][q]; + } + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) { + qm.DWordLength = 16; + qm.AVC = AVC_4x4_Inter_MATRIX; + for (unsigned m = 0; m < 3; m++) + for (unsigned q = 0; q < 16; q++) + qm.ForwardQuantizerMatrix[m * 16 + q] = sps->pScalingLists->ScalingList4x4[m + 3][q]; + } + if (pps->flags.transform_8x8_mode_flag) { + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) { + qm.DWordLength = 16; + qm.AVC = AVC_8x8_Intra_MATRIX; + for (unsigned q = 0; q < 64; q++) + qm.ForwardQuantizerMatrix[q] = sps->pScalingLists->ScalingList8x8[0][q]; + } + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) { + qm.DWordLength = 16; + qm.AVC = AVC_8x8_Inter_MATRIX; + for (unsigned q = 0; q < 64; q++) + qm.ForwardQuantizerMatrix[q] = sps->pScalingLists->ScalingList8x8[3][q]; + } + } + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) { + qm.DWordLength = 16; + qm.AVC = AVC_4x4_Intra_MATRIX; + for (unsigned q = 0; q < 3 * 16; q++) + qm.ForwardQuantizerMatrix[q] = 0x10; + } + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) { + qm.DWordLength = 16; + qm.AVC = AVC_4x4_Inter_MATRIX; + for (unsigned q = 0; q < 3 * 16; q++) + qm.ForwardQuantizerMatrix[q] = 0x10; + } + if (pps->flags.transform_8x8_mode_flag) { + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) { + qm.DWordLength = 16; + qm.AVC = AVC_8x8_Intra_MATRIX; + for (unsigned q = 0; q < 64; q++) + qm.ForwardQuantizerMatrix[q] = 0x10; + } + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) { + qm.DWordLength = 16; + qm.AVC = AVC_8x8_Inter_MATRIX; + for (unsigned q = 0; q < 64; q++) + qm.ForwardQuantizerMatrix[q] = 0x10; + } + } + } + + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_AVC_DIRECTMODE_STATE), avc_directmode) { + /* bind reference frame DMV */ + struct anv_bo *dmv_bo = NULL; + for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) { + int idx = frame_info->pReferenceSlots[i].slotIndex; + const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot = + vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR); + const struct anv_image_view *ref_iv = anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding); + const StdVideoDecodeH264ReferenceInfo *ref_info = dpb_slot->pStdReferenceInfo; + avc_directmode.DirectMVBufferAddress[idx] = anv_image_address(ref_iv->image, + &ref_iv->image->vid_dmv_top_surface); + if (i == 0) { + dmv_bo = ref_iv->image->bindings[0].address.bo; + } + avc_directmode.POCList[2 * idx] = ref_info->PicOrderCnt[0]; + avc_directmode.POCList[2 * idx + 1] = ref_info->PicOrderCnt[1]; + } + avc_directmode.DirectMVBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, dmv_bo, 0), + }; + + avc_directmode.DirectMVBufferWriteAddress = anv_image_address(img, + &img->vid_dmv_top_surface); + avc_directmode.DirectMVBufferWriteAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, img->bindings[0].address.bo, 0), + }; + avc_directmode.POCList[32] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0]; + avc_directmode.POCList[33] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1]; + } + + uint32_t buffer_offset = frame_info->srcBufferOffset & 4095; +#define HEADER_OFFSET 3 + for (unsigned s = 0; s < h264_pic_info->sliceCount; s++) { + bool last_slice = s == (h264_pic_info->sliceCount - 1); + uint32_t current_offset = h264_pic_info->pSliceOffsets[s]; + uint32_t this_end; + if (!last_slice) { + uint32_t next_offset = h264_pic_info->pSliceOffsets[s + 1]; + uint32_t next_end = h264_pic_info->pSliceOffsets[s + 2]; + if (s == h264_pic_info->sliceCount - 2) + next_end = frame_info->srcBufferRange; + anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_SLICEADDR), sliceaddr) { + sliceaddr.IndirectBSDDataLength = next_end - next_offset - HEADER_OFFSET; + /* start decoding after the 3-byte header. */ + sliceaddr.IndirectBSDDataStartAddress = buffer_offset + next_offset + HEADER_OFFSET; + }; + this_end = next_offset; + } else + this_end = frame_info->srcBufferRange; + anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_BSD_OBJECT), avc_bsd) { + avc_bsd.IndirectBSDDataLength = this_end - current_offset - HEADER_OFFSET; + /* start decoding after the 3-byte header. */ + avc_bsd.IndirectBSDDataStartAddress = buffer_offset + current_offset + HEADER_OFFSET; + avc_bsd.InlineData.LastSlice = last_slice; + avc_bsd.InlineData.FixPrevMBSkipped = 1; + avc_bsd.InlineData.IntraPredictionErrorControl = 1; + avc_bsd.InlineData.Intra8x84x4PredictionErrorConcealmentControl = 1; + avc_bsd.InlineData.ISliceConcealmentMode = 1; + }; + } +} + +void +genX(CmdDecodeVideoKHR)(VkCommandBuffer commandBuffer, + const VkVideoDecodeInfoKHR *frame_info) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + switch (cmd_buffer->video.vid->vk.op) { + case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: + anv_h264_decode_video(cmd_buffer, frame_info); + break; + default: + assert(0); + } +} + +#ifdef VK_ENABLE_BETA_EXTENSIONS +void +genX(CmdEncodeVideoKHR)(VkCommandBuffer commandBuffer, + const VkVideoEncodeInfoKHR *pEncodeInfo) +{ +} +#endif diff --git a/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c b/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c index 34337c21f..f9c13954d 100644 --- a/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c +++ b/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c @@ -55,7 +55,9 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) pc.RenderTargetCacheFlushEnable = true; #if GFX_VER >= 12 pc.TileCacheFlushEnable = true; +#endif +#if INTEL_NEEDS_WA_1409600907 /* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must * be set with any PIPE_CONTROL with Depth Flush Enable bit set. */ @@ -209,6 +211,24 @@ want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer, wm_prog_data->computed_depth_mode != PSCDEPTH_OFF; } +static UNUSED bool +geom_or_tess_prim_id_used(struct anv_graphics_pipeline *pipeline) +{ + const struct brw_tcs_prog_data *tcs_prog_data = + anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL) ? + get_tcs_prog_data(pipeline) : NULL; + const struct brw_tes_prog_data *tes_prog_data = + anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? + get_tes_prog_data(pipeline) : NULL; + const struct brw_gs_prog_data *gs_prog_data = + anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) ? + get_gs_prog_data(pipeline) : NULL; + + return (tcs_prog_data && tcs_prog_data->include_primitive_id) || + (tes_prog_data && tes_prog_data->include_primitive_id) || + (gs_prog_data && gs_prog_data->include_primitive_id); +} + static void genX(cmd_emit_te)(struct anv_cmd_buffer *cmd_buffer) { @@ -230,7 +250,21 @@ genX(cmd_emit_te)(struct anv_cmd_buffer *cmd_buffer) te.MaximumTessellationFactorOdd = 63.0; te.MaximumTessellationFactorNotOdd = 64.0; #if GFX_VERx10 >= 125 - te.TessellationDistributionMode = TEDMODE_RR_FREE; + if (intel_needs_workaround(cmd_buffer->device->info, 22012785325)) + te.TessellationDistributionMode = TEDMODE_RR_STRICT; + else + te.TessellationDistributionMode = TEDMODE_RR_FREE; + + if (intel_needs_workaround(cmd_buffer->device->info, 14015297576)) { + /* Wa_14015297576: + * + * Disable Tessellation Distribution when primitive Id is enabled. + */ + if (pipeline->primitive_id_override || + geom_or_tess_prim_id_used(pipeline)) + te.TessellationDistributionMode = TEDMODE_OFF; + } + te.TessellationDistributionLevel = TEDLEVEL_PATCH; /* 64_TRIANGLES */ te.SmallPatchThreshold = 3; @@ -315,7 +349,8 @@ genX(emit_shading_rate)(struct anv_batch *batch, const struct vk_fragment_shading_rate_state *fsr) { const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); - const bool cps_enable = wm_prog_data && wm_prog_data->per_coarse_pixel_dispatch; + const bool cps_enable = wm_prog_data && + brw_wm_prog_data_is_coarse(wm_prog_data, 0); #if GFX_VER == 11 anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) { @@ -392,6 +427,36 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) &cmd_buffer->vk.dynamic_graphics_state; if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI)) { + const uint32_t ve_count = + pipeline->vs_input_elements + pipeline->svgs_count; + const uint32_t num_dwords = 1 + 2 * MAX2(1, ve_count); + uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GENX(3DSTATE_VERTEX_ELEMENTS)); + + if (p) { + if (ve_count == 0) { + memcpy(p + 1, cmd_buffer->device->empty_vs_input, + sizeof(cmd_buffer->device->empty_vs_input)); + } else if (ve_count == pipeline->vertex_input_elems) { + /* MESA_VK_DYNAMIC_VI is not dynamic for this pipeline, so + * everything is in pipeline->vertex_input_data and we can just + * memcpy + */ + memcpy(p + 1, pipeline->vertex_input_data, 4 * 2 * ve_count); + } else { + /* Use dyn->vi to emit the dynamic VERTEX_ELEMENT_STATE input. */ + genX(emit_vertex_input)(&cmd_buffer->batch, p + 1, + pipeline, dyn->vi); + /* Then append the VERTEX_ELEMENT_STATE for the draw parameters */ + memcpy(p + 1 + 2 * pipeline->vs_input_elements, + pipeline->vertex_input_data, + 4 * 2 * pipeline->vertex_input_elems); + } + } + } + + if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) { genX(cmd_emit_te)(cmd_buffer); } @@ -650,8 +715,12 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) #endif if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations && - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS)) - genX(emit_sample_pattern)(&cmd_buffer->batch, dyn->ms.sample_locations); + (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE))) { + genX(emit_sample_pattern)(&cmd_buffer->batch, + dyn->ms.sample_locations_enable ? + dyn->ms.sample_locations : NULL); + } if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) || diff --git a/lib/mesa/src/intel/vulkan/grl/genX_grl.h b/lib/mesa/src/intel/vulkan/grl/genX_grl.h index 6617e210b..57aefa72d 100644 --- a/lib/mesa/src/intel/vulkan/grl/genX_grl.h +++ b/lib/mesa/src/intel/vulkan/grl/genX_grl.h @@ -24,13 +24,15 @@ #ifndef ANV_GRL_H #define ANV_GRL_H +#include "grl/grl_cl_kernel.h" +#include "genxml/gen_macros.h" + #ifdef __cplusplus extern "C" { #endif -#include "anv_private.h" -#include "grl/grl_cl_kernel.h" -#include "genxml/gen_macros.h" +struct anv_cmd_buffer; +struct anv_kernel_arg; void genX(grl_dispatch)(struct anv_cmd_buffer *cmd_buffer, @@ -42,6 +44,9 @@ genX(grl_dispatch)(struct anv_cmd_buffer *cmd_buffer, void genX(grl_load_rt_uuid)(uint8_t *out_uuid); +uint32_t +genX(grl_max_scratch_size)(void); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/lib/mesa/src/intel/vulkan/grl/genX_grl_dispatch.c b/lib/mesa/src/intel/vulkan/grl/genX_grl_dispatch.c index a320e6faa..eff7c4074 100644 --- a/lib/mesa/src/intel/vulkan/grl/genX_grl_dispatch.c +++ b/lib/mesa/src/intel/vulkan/grl/genX_grl_dispatch.c @@ -21,6 +21,7 @@ * IN THE SOFTWARE. */ +#include "anv_private.h" #include "genX_grl.h" static struct anv_shader_bin * @@ -89,3 +90,19 @@ genX(grl_dispatch)(struct anv_cmd_buffer *cmd_buffer, genX(cmd_buffer_dispatch_kernel)(cmd_buffer, &ak, global_size, arg_count, args); } + +uint32_t +genX(grl_max_scratch_size)(void) +{ + uint32_t scratch_size = 0; + + for (uint32_t i = 0; i < GRL_CL_KERNEL_MAX; i++) { + struct brw_kernel kernel_data; + genX(grl_get_cl_kernel)(&kernel_data, i); + + scratch_size = MAX2(kernel_data.prog_data.base.total_scratch, + scratch_size); + } + + return scratch_size; +} diff --git a/lib/mesa/src/intel/vulkan/grl/genX_grl_uuid.cpp b/lib/mesa/src/intel/vulkan/grl/genX_grl_uuid.cpp index 9f4335892..cf6b425fe 100644 --- a/lib/mesa/src/intel/vulkan/grl/genX_grl_uuid.cpp +++ b/lib/mesa/src/intel/vulkan/grl/genX_grl_uuid.cpp @@ -24,15 +24,16 @@ #include <assert.h> #include <string.h> +#include "genX_grl.h" #include "include/GRLGen12.h" #include "vulkan/vulkan_core.h" extern "C" void -gfx125_grl_load_rt_uuid(uint8_t *out_uuid); +genX(grl_load_rt_uuid)(uint8_t *out_uuid); extern "C" void -gfx125_grl_load_rt_uuid(uint8_t *out_uuid) +genX(grl_load_rt_uuid)(uint8_t *out_uuid) { assert(sizeof(GRL::RTAS::GEN12::BVH_MAGIC) == VK_UUID_SIZE); memcpy(out_uuid, GRL::RTAS::GEN12::BVH_MAGIC, VK_UUID_SIZE); diff --git a/lib/mesa/src/intel/vulkan/grl/grl_cl_kernel_gen.py b/lib/mesa/src/intel/vulkan/grl/grl_cl_kernel_gen.py index 4b0b8babd..c7efeff53 100644 --- a/lib/mesa/src/intel/vulkan/grl/grl_cl_kernel_gen.py +++ b/lib/mesa/src/intel/vulkan/grl/grl_cl_kernel_gen.py @@ -36,13 +36,13 @@ TEMPLATE_H = Template(COPYRIGHT + """ #ifndef GRL_CL_KERNEL_H #define GRL_CL_KERNEL_H +#include "genxml/gen_macros.h" +#include "compiler/brw_kernel.h" + #ifdef __cplusplus extern "C" { #endif -#include "genxml/gen_macros.h" -#include "compiler/brw_kernel.h" - enum grl_cl_kernel { % for k in kernels: GRL_CL_KERNEL_${k.upper()}, @@ -50,7 +50,7 @@ enum grl_cl_kernel { GRL_CL_KERNEL_MAX, }; -const char *grl_cl_kernel_name(enum grl_cl_kernel kernel); +const char *genX(grl_cl_kernel_name)(enum grl_cl_kernel kernel); const char *genX(grl_get_cl_kernel_sha1)(enum grl_cl_kernel id); @@ -73,7 +73,7 @@ TEMPLATE_C = Template(COPYRIGHT + """ % endfor const char * -grl_cl_kernel_name(enum grl_cl_kernel kernel) +genX(grl_cl_kernel_name)(enum grl_cl_kernel kernel) { switch (kernel) { % for k in kernels: diff --git a/lib/mesa/src/intel/vulkan/grl/grl_metakernel_gen.py b/lib/mesa/src/intel/vulkan/grl/grl_metakernel_gen.py index 029ecf30f..0a14113a3 100644 --- a/lib/mesa/src/intel/vulkan/grl/grl_metakernel_gen.py +++ b/lib/mesa/src/intel/vulkan/grl/grl_metakernel_gen.py @@ -866,7 +866,7 @@ C_PROLOGUE = COPYRIGHT + ''' #include "genxml/gen_macros.h" #include "genxml/genX_pack.h" -#include "genxml/gen_rt_pack.h" +#include "genxml/genX_rt_pack.h" /* We reserve : * - GPR 14 for secondary command buffer returns diff --git a/lib/mesa/src/intel/vulkan/grl/meson.build b/lib/mesa/src/intel/vulkan/grl/meson.build index 979414c07..c0056b349 100644 --- a/lib/mesa/src/intel/vulkan/grl/meson.build +++ b/lib/mesa/src/intel/vulkan/grl/meson.build @@ -142,6 +142,7 @@ foreach t : [['125', 'gfx125', 'dg2']] # without modifying grl source code, remove # if fixed there ], + env: ['MESA_SHADER_CACHE_DISABLE=true'], depends : [prog_intel_clc] ) endforeach @@ -165,11 +166,11 @@ foreach t : [['125', 'gfx125', 'dg2']] inc_intel, ], c_args : [ - no_override_init_args, c_sse2_args, + no_override_init_args, sse2_args, '-DGFX_VERx10=@0@'.format(verX10), ], cpp_args : [ - no_override_init_args, c_sse2_args, + sse2_args, '-DGFX_VERx10=@0@'.format(verX10), ], dependencies : [ @@ -196,7 +197,6 @@ libgrl = static_library( ], link_whole : [grl_genX_libs], dependencies : [libgrl_deps, idep_anv_headers], - install : true, ) idep_grl = declare_dependency( link_with : libgrl, diff --git a/lib/mesa/src/intel/vulkan/i915/anv_batch_chain.c b/lib/mesa/src/intel/vulkan/i915/anv_batch_chain.c new file mode 100644 index 000000000..ff6e7d1ae --- /dev/null +++ b/lib/mesa/src/intel/vulkan/i915/anv_batch_chain.c @@ -0,0 +1,813 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "i915/anv_batch_chain.h" +#include "anv_private.h" +#include "anv_measure.h" + +#include "perf/intel_perf.h" +#include "util/u_debug.h" + +#include "drm-uapi/i915_drm.h" + +struct anv_execbuf { + struct drm_i915_gem_execbuffer2 execbuf; + + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; + + struct drm_i915_gem_exec_object2 * objects; + uint32_t bo_count; + uint32_t bo_array_length; + struct anv_bo ** bos; + + uint32_t syncobj_count; + uint32_t syncobj_array_length; + struct drm_i915_gem_exec_fence * syncobjs; + uint64_t * syncobj_values; + + uint32_t cmd_buffer_count; + struct anv_query_pool *perf_query_pool; + + const VkAllocationCallbacks * alloc; + VkSystemAllocationScope alloc_scope; + + int perf_query_pass; +}; + +static void +anv_execbuf_finish(struct anv_execbuf *exec) +{ + vk_free(exec->alloc, exec->syncobjs); + vk_free(exec->alloc, exec->syncobj_values); + vk_free(exec->alloc, exec->objects); + vk_free(exec->alloc, exec->bos); +} + +static void +anv_execbuf_add_ext(struct anv_execbuf *exec, + uint32_t ext_name, + struct i915_user_extension *ext) +{ + __u64 *iter = &exec->execbuf.cliprects_ptr; + + exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS; + + while (*iter != 0) { + iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension; + } + + ext->name = ext_name; + + *iter = (uintptr_t) ext; +} + +static VkResult +anv_execbuf_add_bo_bitset(struct anv_device *device, + struct anv_execbuf *exec, + uint32_t dep_words, + BITSET_WORD *deps, + uint32_t extra_flags); + +static VkResult +anv_execbuf_add_bo(struct anv_device *device, + struct anv_execbuf *exec, + struct anv_bo *bo, + struct anv_reloc_list *relocs, + uint32_t extra_flags) +{ + struct drm_i915_gem_exec_object2 *obj = NULL; + + if (bo->exec_obj_index < exec->bo_count && + exec->bos[bo->exec_obj_index] == bo) + obj = &exec->objects[bo->exec_obj_index]; + + if (obj == NULL) { + /* We've never seen this one before. Add it to the list and assign + * an id that we can use later. + */ + if (exec->bo_count >= exec->bo_array_length) { + uint32_t new_len = exec->objects ? exec->bo_array_length * 2 : 64; + + struct drm_i915_gem_exec_object2 *new_objects = + vk_realloc(exec->alloc, exec->objects, + new_len * sizeof(*new_objects), 8, exec->alloc_scope); + if (new_objects == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + exec->objects = new_objects; + + struct anv_bo **new_bos = + vk_realloc(exec->alloc, exec->bos, new_len * sizeof(*new_bos), 8, + exec->alloc_scope); + if (new_bos == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + exec->bos = new_bos; + exec->bo_array_length = new_len; + } + + assert(exec->bo_count < exec->bo_array_length); + + bo->exec_obj_index = exec->bo_count++; + obj = &exec->objects[bo->exec_obj_index]; + exec->bos[bo->exec_obj_index] = bo; + + obj->handle = bo->gem_handle; + obj->relocation_count = 0; + obj->relocs_ptr = 0; + obj->alignment = 0; + obj->offset = bo->offset; + obj->flags = bo->flags | extra_flags; + obj->rsvd1 = 0; + obj->rsvd2 = 0; + } + + if (extra_flags & EXEC_OBJECT_WRITE) { + obj->flags |= EXEC_OBJECT_WRITE; + obj->flags &= ~EXEC_OBJECT_ASYNC; + } + + if (relocs != NULL) { + return anv_execbuf_add_bo_bitset(device, exec, relocs->dep_words, + relocs->deps, extra_flags); + } + + return VK_SUCCESS; +} + +/* Add BO dependencies to execbuf */ +static VkResult +anv_execbuf_add_bo_bitset(struct anv_device *device, + struct anv_execbuf *exec, + uint32_t dep_words, + BITSET_WORD *deps, + uint32_t extra_flags) +{ + for (uint32_t w = 0; w < dep_words; w++) { + BITSET_WORD mask = deps[w]; + while (mask) { + int i = u_bit_scan(&mask); + uint32_t gem_handle = w * BITSET_WORDBITS + i; + struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle); + assert(bo->refcount > 0); + VkResult result = + anv_execbuf_add_bo(device, exec, bo, NULL, extra_flags); + if (result != VK_SUCCESS) + return result; + } + } + + return VK_SUCCESS; +} + +static VkResult +anv_execbuf_add_syncobj(struct anv_device *device, + struct anv_execbuf *exec, + uint32_t syncobj, + uint32_t flags, + uint64_t timeline_value) +{ + if (exec->syncobj_count >= exec->syncobj_array_length) { + uint32_t new_len = MAX2(exec->syncobj_array_length * 2, 16); + + struct drm_i915_gem_exec_fence *new_syncobjs = + vk_realloc(exec->alloc, exec->syncobjs, + new_len * sizeof(*new_syncobjs), 8, exec->alloc_scope); + if (new_syncobjs == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + exec->syncobjs = new_syncobjs; + + if (exec->syncobj_values) { + uint64_t *new_syncobj_values = + vk_realloc(exec->alloc, exec->syncobj_values, + new_len * sizeof(*new_syncobj_values), 8, + exec->alloc_scope); + if (new_syncobj_values == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + exec->syncobj_values = new_syncobj_values; + } + + exec->syncobj_array_length = new_len; + } + + if (timeline_value && !exec->syncobj_values) { + exec->syncobj_values = + vk_zalloc(exec->alloc, exec->syncobj_array_length * + sizeof(*exec->syncobj_values), + 8, exec->alloc_scope); + if (!exec->syncobj_values) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + exec->syncobjs[exec->syncobj_count] = (struct drm_i915_gem_exec_fence) { + .handle = syncobj, + .flags = flags, + }; + if (exec->syncobj_values) + exec->syncobj_values[exec->syncobj_count] = timeline_value; + + exec->syncobj_count++; + + return VK_SUCCESS; +} + +static VkResult +anv_execbuf_add_sync(struct anv_device *device, + struct anv_execbuf *execbuf, + struct vk_sync *sync, + bool is_signal, + uint64_t value) +{ + /* It's illegal to signal a timeline with value 0 because that's never + * higher than the current value. A timeline wait on value 0 is always + * trivial because 0 <= uint64_t always. + */ + if ((sync->flags & VK_SYNC_IS_TIMELINE) && value == 0) + return VK_SUCCESS; + + if (vk_sync_is_anv_bo_sync(sync)) { + struct anv_bo_sync *bo_sync = + container_of(sync, struct anv_bo_sync, sync); + + assert(is_signal == (bo_sync->state == ANV_BO_SYNC_STATE_RESET)); + + return anv_execbuf_add_bo(device, execbuf, bo_sync->bo, NULL, + is_signal ? EXEC_OBJECT_WRITE : 0); + } else if (vk_sync_type_is_drm_syncobj(sync->type)) { + struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync); + + if (!(sync->flags & VK_SYNC_IS_TIMELINE)) + value = 0; + + return anv_execbuf_add_syncobj(device, execbuf, syncobj->syncobj, + is_signal ? I915_EXEC_FENCE_SIGNAL : + I915_EXEC_FENCE_WAIT, + value); + } + + unreachable("Invalid sync type"); +} + +static VkResult +setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf, + struct anv_cmd_buffer *cmd_buffer) +{ + VkResult result; + /* Add surface dependencies (BOs) to the execbuf */ + result = anv_execbuf_add_bo_bitset(cmd_buffer->device, execbuf, + cmd_buffer->surface_relocs.dep_words, + cmd_buffer->surface_relocs.deps, 0); + if (result != VK_SUCCESS) + return result; + + /* First, we walk over all of the bos we've seen and add them and their + * relocations to the validate list. + */ + struct anv_batch_bo **bbo; + u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { + result = anv_execbuf_add_bo(cmd_buffer->device, execbuf, + (*bbo)->bo, &(*bbo)->relocs, 0); + if (result != VK_SUCCESS) + return result; + } + + struct anv_bo **bo_entry; + u_vector_foreach(bo_entry, &cmd_buffer->dynamic_bos) { + result = anv_execbuf_add_bo(cmd_buffer->device, execbuf, + *bo_entry, NULL, 0); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +static VkResult +pin_state_pool(struct anv_device *device, + struct anv_execbuf *execbuf, + struct anv_state_pool *pool) +{ + anv_block_pool_foreach_bo(bo, &pool->block_pool) { + VkResult result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +static VkResult +setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf, + struct anv_queue *queue, + struct anv_cmd_buffer **cmd_buffers, + uint32_t num_cmd_buffers) +{ + struct anv_device *device = queue->device; + VkResult result; + + /* Edit the tail of the command buffers to chain them all together if they + * can be. + */ + anv_cmd_buffer_chain_command_buffers(cmd_buffers, num_cmd_buffers); + + for (uint32_t i = 0; i < num_cmd_buffers; i++) { + anv_measure_submit(cmd_buffers[i]); + result = setup_execbuf_for_cmd_buffer(execbuf, cmd_buffers[i]); + if (result != VK_SUCCESS) + return result; + } + + /* Add all the global BOs to the object list for softpin case. */ + result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool); + if (result != VK_SUCCESS) + return result; + + result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool); + if (result != VK_SUCCESS) + return result; + + result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool); + if (result != VK_SUCCESS) + return result; + + result = pin_state_pool(device, execbuf, &device->dynamic_state_pool); + if (result != VK_SUCCESS) + return result; + + result = pin_state_pool(device, execbuf, &device->general_state_pool); + if (result != VK_SUCCESS) + return result; + + result = pin_state_pool(device, execbuf, &device->instruction_state_pool); + if (result != VK_SUCCESS) + return result; + + result = pin_state_pool(device, execbuf, &device->binding_table_pool); + if (result != VK_SUCCESS) + return result; + + /* Add the BOs for all user allocated memory objects because we can't + * track after binding updates of VK_EXT_descriptor_indexing. + */ + list_for_each_entry(struct anv_device_memory, mem, + &device->memory_objects, link) { + result = anv_execbuf_add_bo(device, execbuf, mem->bo, NULL, 0); + if (result != VK_SUCCESS) + return result; + } + + /* Add all the private BOs from images because we can't track after binding + * updates of VK_EXT_descriptor_indexing. + */ + list_for_each_entry(struct anv_image, image, + &device->image_private_objects, link) { + struct anv_bo *private_bo = + image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE].address.bo; + result = anv_execbuf_add_bo(device, execbuf, private_bo, NULL, 0); + if (result != VK_SUCCESS) + return result; + } + + struct anv_batch_bo *first_batch_bo = + list_first_entry(&cmd_buffers[0]->batch_bos, struct anv_batch_bo, link); + + /* The kernel requires that the last entry in the validation list be the + * batch buffer to execute. We can simply swap the element + * corresponding to the first batch_bo in the chain with the last + * element in the list. + */ + if (first_batch_bo->bo->exec_obj_index != execbuf->bo_count - 1) { + uint32_t idx = first_batch_bo->bo->exec_obj_index; + uint32_t last_idx = execbuf->bo_count - 1; + + struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx]; + assert(execbuf->bos[idx] == first_batch_bo->bo); + + execbuf->objects[idx] = execbuf->objects[last_idx]; + execbuf->bos[idx] = execbuf->bos[last_idx]; + execbuf->bos[idx]->exec_obj_index = idx; + + execbuf->objects[last_idx] = tmp_obj; + execbuf->bos[last_idx] = first_batch_bo->bo; + first_batch_bo->bo->exec_obj_index = last_idx; + } + +#ifdef SUPPORT_INTEL_INTEGRATED_GPUS + if (device->physical->memory.need_clflush) + anv_cmd_buffer_clflush(cmd_buffers, num_cmd_buffers); +#endif + + execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) execbuf->objects, + .buffer_count = execbuf->bo_count, + .batch_start_offset = 0, + /* We'll fill in batch length later when chaining batches. */ + .batch_len = 0, + .cliprects_ptr = 0, + .num_cliprects = 0, + .DR1 = 0, + .DR4 = 0, + .flags = I915_EXEC_NO_RELOC | + I915_EXEC_HANDLE_LUT | + queue->exec_flags, + .rsvd1 = device->context_id, + .rsvd2 = 0, + }; + + return VK_SUCCESS; +} + +static VkResult +setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue) +{ + struct anv_device *device = queue->device; + VkResult result = anv_execbuf_add_bo(device, execbuf, + device->trivial_batch_bo, + NULL, 0); + if (result != VK_SUCCESS) + return result; + + execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) execbuf->objects, + .buffer_count = execbuf->bo_count, + .batch_start_offset = 0, + .batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */ + .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC, + .rsvd1 = device->context_id, + .rsvd2 = 0, + }; + + return VK_SUCCESS; +} + +static VkResult +setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue, + struct anv_utrace_submit *submit) +{ + struct anv_device *device = queue->device; + + /* Always add the workaround BO as it includes a driver identifier for the + * error_state. + */ + VkResult result = anv_execbuf_add_bo(device, execbuf, + device->workaround_bo, + NULL, 0); + if (result != VK_SUCCESS) + return result; + + result = anv_execbuf_add_bo(device, execbuf, + submit->batch_bo, + &submit->relocs, 0); + if (result != VK_SUCCESS) + return result; + + result = anv_execbuf_add_sync(device, execbuf, submit->sync, + true /* is_signal */, 0 /* value */); + if (result != VK_SUCCESS) + return result; + + if (submit->batch_bo->exec_obj_index != execbuf->bo_count - 1) { + uint32_t idx = submit->batch_bo->exec_obj_index; + uint32_t last_idx = execbuf->bo_count - 1; + + struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx]; + assert(execbuf->bos[idx] == submit->batch_bo); + + execbuf->objects[idx] = execbuf->objects[last_idx]; + execbuf->bos[idx] = execbuf->bos[last_idx]; + execbuf->bos[idx]->exec_obj_index = idx; + + execbuf->objects[last_idx] = tmp_obj; + execbuf->bos[last_idx] = submit->batch_bo; + submit->batch_bo->exec_obj_index = last_idx; + } + +#ifdef SUPPORT_INTEL_INTEGRATED_GPUS + if (device->physical->memory.need_clflush) + intel_flush_range(submit->batch_bo->map, submit->batch_bo->size); +#endif + + execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) execbuf->objects, + .buffer_count = execbuf->bo_count, + .batch_start_offset = 0, + .batch_len = submit->batch.next - submit->batch.start, + .flags = I915_EXEC_NO_RELOC | + I915_EXEC_HANDLE_LUT | + I915_EXEC_FENCE_ARRAY | + queue->exec_flags, + .rsvd1 = device->context_id, + .rsvd2 = 0, + .num_cliprects = execbuf->syncobj_count, + .cliprects_ptr = (uintptr_t)execbuf->syncobjs, + }; + + return VK_SUCCESS; +} + +static int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + if (execbuf->flags & I915_EXEC_FENCE_OUT) + return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf); + else + return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); +} + +static VkResult +anv_queue_exec_utrace_locked(struct anv_queue *queue, + struct anv_utrace_submit *submit) +{ + assert(submit->batch_bo); + + struct anv_device *device = queue->device; + struct anv_execbuf execbuf = { + .alloc = &device->vk.alloc, + .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, + }; + + VkResult result = setup_utrace_execbuf(&execbuf, queue, submit); + if (result != VK_SUCCESS) + goto error; + + int ret = queue->device->info->no_hw ? 0 : + anv_gem_execbuffer(queue->device, &execbuf.execbuf); + if (ret) + result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m"); + + error: + anv_execbuf_finish(&execbuf); + + return result; +} + +static void +anv_i915_debug_submit(const struct anv_execbuf *execbuf) +{ + uint32_t total_size_kb = 0, total_vram_only_size_kb = 0; + for (uint32_t i = 0; i < execbuf->bo_count; i++) { + const struct anv_bo *bo = execbuf->bos[i]; + total_size_kb += bo->size / 1024; + if (bo->vram_only) + total_vram_only_size_kb += bo->size / 1024; + } + + fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0 (aperture: %.1fMb, %.1fMb VRAM only)\n", + execbuf->execbuf.batch_start_offset, execbuf->execbuf.batch_len, + (float)total_size_kb / 1024.0f, + (float)total_vram_only_size_kb / 1024.0f); + for (uint32_t i = 0; i < execbuf->bo_count; i++) { + const struct anv_bo *bo = execbuf->bos[i]; + uint64_t size = bo->size + bo->_ccs_size; + + fprintf(stderr, " BO: addr=0x%016"PRIx64"-0x%016"PRIx64" size=%7"PRIu64 + "KB handle=%05u capture=%u vram_only=%u name=%s\n", + bo->offset, bo->offset + size - 1, size / 1024, bo->gem_handle, + (bo->flags & EXEC_OBJECT_CAPTURE) != 0, + bo->vram_only, bo->name); + } +} + +VkResult +i915_queue_exec_locked(struct anv_queue *queue, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, + uint32_t signal_count, + const struct vk_sync_signal *signals, + struct anv_query_pool *perf_query_pool, + uint32_t perf_query_pass) +{ + struct anv_device *device = queue->device; + struct anv_utrace_submit *utrace_submit = NULL; + struct anv_execbuf execbuf = { + .alloc = &queue->device->vk.alloc, + .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, + .perf_query_pass = perf_query_pass, + }; + + /* Flush the trace points first, they need to be moved */ + VkResult result = + anv_device_utrace_flush_cmd_buffers(queue, + cmd_buffer_count, + cmd_buffers, + &utrace_submit); + if (result != VK_SUCCESS) + goto error; + + if (utrace_submit && !utrace_submit->batch_bo) { + result = anv_execbuf_add_sync(device, &execbuf, + utrace_submit->sync, + true /* is_signal */, + 0); + if (result != VK_SUCCESS) + goto error; + + /* When The utrace submission doesn't have its own batch buffer*/ + utrace_submit = NULL; + } + + /* Always add the workaround BO as it includes a driver identifier for the + * error_state. + */ + result = + anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0); + if (result != VK_SUCCESS) + goto error; + + for (uint32_t i = 0; i < wait_count; i++) { + result = anv_execbuf_add_sync(device, &execbuf, + waits[i].sync, + false /* is_signal */, + waits[i].wait_value); + if (result != VK_SUCCESS) + goto error; + } + + for (uint32_t i = 0; i < signal_count; i++) { + result = anv_execbuf_add_sync(device, &execbuf, + signals[i].sync, + true /* is_signal */, + signals[i].signal_value); + if (result != VK_SUCCESS) + goto error; + } + + if (queue->sync) { + result = anv_execbuf_add_sync(device, &execbuf, + queue->sync, + true /* is_signal */, + 0 /* signal_value */); + if (result != VK_SUCCESS) + goto error; + } + + if (cmd_buffer_count) { + result = setup_execbuf_for_cmd_buffers(&execbuf, queue, + cmd_buffers, + cmd_buffer_count); + } else { + result = setup_empty_execbuf(&execbuf, queue); + } + + if (result != VK_SUCCESS) + goto error; + + const bool has_perf_query = + perf_query_pool && perf_query_pass >= 0 && cmd_buffer_count; + + if (INTEL_DEBUG(DEBUG_SUBMIT)) + anv_i915_debug_submit(&execbuf); + + anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers, + perf_query_pool, perf_query_pass); + + if (execbuf.syncobj_values) { + execbuf.timeline_fences.fence_count = execbuf.syncobj_count; + execbuf.timeline_fences.handles_ptr = (uintptr_t)execbuf.syncobjs; + execbuf.timeline_fences.values_ptr = (uintptr_t)execbuf.syncobj_values; + anv_execbuf_add_ext(&execbuf, + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES, + &execbuf.timeline_fences.base); + } else if (execbuf.syncobjs) { + execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY; + execbuf.execbuf.num_cliprects = execbuf.syncobj_count; + execbuf.execbuf.cliprects_ptr = (uintptr_t)execbuf.syncobjs; + } + + if (has_perf_query) { + assert(perf_query_pass < perf_query_pool->n_passes); + struct intel_perf_query_info *query_info = + perf_query_pool->pass_query[perf_query_pass]; + + /* Some performance queries just the pipeline statistic HW, no need for + * OA in that case, so no need to reconfigure. + */ + if (!INTEL_DEBUG(DEBUG_NO_OACONFIG) && + (query_info->kind == INTEL_PERF_QUERY_TYPE_OA || + query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) { + int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG, + (void *)(uintptr_t) query_info->oa_metrics_set_id); + if (ret < 0) { + result = vk_device_set_lost(&device->vk, + "i915-perf config failed: %s", + strerror(errno)); + } + } + + struct anv_bo *pass_batch_bo = perf_query_pool->bo; + + struct drm_i915_gem_exec_object2 query_pass_object = { + .handle = pass_batch_bo->gem_handle, + .offset = pass_batch_bo->offset, + .flags = pass_batch_bo->flags, + }; + struct drm_i915_gem_execbuffer2 query_pass_execbuf = { + .buffers_ptr = (uintptr_t) &query_pass_object, + .buffer_count = 1, + .batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool, + perf_query_pass), + .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags, + .rsvd1 = device->context_id, + }; + + int ret = queue->device->info->no_hw ? 0 : + anv_gem_execbuffer(queue->device, &query_pass_execbuf); + if (ret) + result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m"); + } + + int ret = queue->device->info->no_hw ? 0 : + anv_gem_execbuffer(queue->device, &execbuf.execbuf); + if (ret) { + anv_i915_debug_submit(&execbuf); + result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m"); + } + + if (result == VK_SUCCESS && queue->sync) { + result = vk_sync_wait(&device->vk, queue->sync, 0, + VK_SYNC_WAIT_COMPLETE, UINT64_MAX); + if (result != VK_SUCCESS) + result = vk_queue_set_lost(&queue->vk, "sync wait failed"); + } + + error: + anv_execbuf_finish(&execbuf); + + if (result == VK_SUCCESS && utrace_submit) + result = anv_queue_exec_utrace_locked(queue, utrace_submit); + + return result; +} + +VkResult +i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo, + uint32_t batch_bo_size) +{ + struct anv_device *device = queue->device; + struct anv_execbuf execbuf = { + .alloc = &queue->device->vk.alloc, + .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, + }; + + VkResult result = anv_execbuf_add_bo(device, &execbuf, batch_bo, NULL, 0); + if (result != VK_SUCCESS) + goto fail; + + execbuf.execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) execbuf.objects, + .buffer_count = execbuf.bo_count, + .batch_start_offset = 0, + .batch_len = batch_bo_size, + .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC, + .rsvd1 = device->context_id, + .rsvd2 = 0, + }; + + if (anv_gem_execbuffer(device, &execbuf.execbuf)) { + result = vk_device_set_lost(&device->vk, "anv_gem_execbuffer failed: %m"); + goto fail; + } + + result = anv_device_wait(device, batch_bo, INT64_MAX); + if (result != VK_SUCCESS) + result = vk_device_set_lost(&device->vk, + "anv_device_wait failed: %m"); + +fail: + anv_execbuf_finish(&execbuf); + return result; +} + +VkResult +i915_queue_exec_trace(struct anv_queue *queue, + struct anv_utrace_submit *submit) +{ + assert(submit->batch_bo); + + return anv_queue_exec_utrace_locked(queue, submit); +} diff --git a/lib/mesa/src/intel/vulkan/i915/anv_batch_chain.h b/lib/mesa/src/intel/vulkan/i915/anv_batch_chain.h new file mode 100644 index 000000000..5e3f14fd0 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/i915/anv_batch_chain.h @@ -0,0 +1,53 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include <stdint.h> + +#include "vulkan/vulkan_core.h" + +#include "vk_sync.h" + +struct anv_queue; +struct anv_bo; +struct anv_cmd_buffer; +struct anv_query_pool; +struct anv_utrace_submit; + +VkResult +i915_queue_exec_trace(struct anv_queue *queue, + struct anv_utrace_submit *submit); +VkResult +i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo, + uint32_t batch_bo_size); +VkResult +i915_queue_exec_locked(struct anv_queue *queue, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, + uint32_t signal_count, + const struct vk_sync_signal *signals, + struct anv_query_pool *perf_query_pool, + uint32_t perf_query_pass); diff --git a/lib/mesa/src/intel/vulkan/i915/anv_device.c b/lib/mesa/src/intel/vulkan/i915/anv_device.c new file mode 100644 index 000000000..ada5a85e8 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/i915/anv_device.c @@ -0,0 +1,244 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "i915/anv_device.h" +#include "anv_private.h" + +#include "common/intel_defines.h" + +#include "drm-uapi/i915_drm.h" + +static int +vk_priority_to_i915(VkQueueGlobalPriorityKHR priority) +{ + switch (priority) { + case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR: + return INTEL_CONTEXT_LOW_PRIORITY; + case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR: + return INTEL_CONTEXT_MEDIUM_PRIORITY; + case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR: + return INTEL_CONTEXT_HIGH_PRIORITY; + case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR: + return INTEL_CONTEXT_REALTIME_PRIORITY; + default: + unreachable("Invalid priority"); + } +} + +static int +anv_gem_set_context_param(int fd, uint32_t context, uint32_t param, uint64_t value) +{ + if (param == I915_CONTEXT_PARAM_PRIORITY) + value = vk_priority_to_i915(value); + + int err = 0; + if (!intel_gem_set_context_param(fd, context, param, value)) + err = -errno; + return err; +} + +static bool +anv_gem_has_context_priority(int fd, VkQueueGlobalPriorityKHR priority) +{ + return !anv_gem_set_context_param(fd, 0, I915_CONTEXT_PARAM_PRIORITY, + priority); +} + +VkResult +anv_i915_physical_device_get_parameters(struct anv_physical_device *device) +{ + VkResult result = VK_SUCCESS; + int val, fd = device->local_fd; + + if (!intel_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT, &val) || !val) { + result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, + "kernel missing gem wait"); + return result; + } + + if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2, &val) || !val) { + result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, + "kernel missing execbuf2"); + return result; + } + + if (!device->info.has_llc && + (!intel_gem_get_param(fd, I915_PARAM_MMAP_VERSION, &val) || val < 1)) { + result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, + "kernel missing wc mmap"); + return result; + } + + if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &val) || !val) { + result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, + "kernel missing softpin"); + return result; + } + + if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY, &val) || !val) { + result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, + "kernel missing syncobj support"); + return result; + } + + if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC, &val)) + device->has_exec_async = val; + if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE, &val)) + device->has_exec_capture = val; + + /* Start with medium; sorted low to high */ + const VkQueueGlobalPriorityKHR priorities[] = { + VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR, + VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR, + VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR, + VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR, + }; + device->max_context_priority = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR; + for (unsigned i = 0; i < ARRAY_SIZE(priorities); i++) { + if (!anv_gem_has_context_priority(fd, priorities[i])) + break; + device->max_context_priority = priorities[i]; + } + + if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES, &val)) + device->has_exec_timeline = val; + + return result; +} + +VkResult +anv_i915_device_setup_context(struct anv_device *device, + const VkDeviceCreateInfo *pCreateInfo, + const uint32_t num_queues) +{ + struct anv_physical_device *physical_device = device->physical; + VkResult result = VK_SUCCESS; + + if (device->physical->engine_info) { + /* The kernel API supports at most 64 engines */ + assert(num_queues <= 64); + enum intel_engine_class engine_classes[64]; + int engine_count = 0; + for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { + const VkDeviceQueueCreateInfo *queueCreateInfo = + &pCreateInfo->pQueueCreateInfos[i]; + + assert(queueCreateInfo->queueFamilyIndex < + physical_device->queue.family_count); + struct anv_queue_family *queue_family = + &physical_device->queue.families[queueCreateInfo->queueFamilyIndex]; + + for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) + engine_classes[engine_count++] = queue_family->engine_class; + } + if (!intel_gem_create_context_engines(device->fd, + physical_device->engine_info, + engine_count, engine_classes, + (uint32_t *)&device->context_id)) + result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, + "kernel context creation failed"); + } else { + assert(num_queues == 1); + if (!intel_gem_create_context(device->fd, &device->context_id)) + result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED); + } + + if (result != VK_SUCCESS) + return result; + + /* Here we tell the kernel not to attempt to recover our context but + * immediately (on the next batchbuffer submission) report that the + * context is lost, and we will do the recovery ourselves. In the case + * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting + * the client clean up the pieces. + */ + anv_gem_set_context_param(device->fd, device->context_id, + I915_CONTEXT_PARAM_RECOVERABLE, false); + + /* Check if client specified queue priority. */ + const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority = + vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext, + DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR); + + VkQueueGlobalPriorityKHR priority = + queue_priority ? queue_priority->globalPriority : + VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR; + + /* As per spec, the driver implementation may deny requests to acquire + * a priority above the default priority (MEDIUM) if the caller does not + * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR + * is returned. + */ + if (physical_device->max_context_priority >= VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) { + int err = anv_gem_set_context_param(device->fd, device->context_id, + I915_CONTEXT_PARAM_PRIORITY, + priority); + if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) { + result = vk_error(device, VK_ERROR_NOT_PERMITTED_KHR); + goto fail_context; + } + } + + return result; + +fail_context: + intel_gem_destroy_context(device->fd, device->context_id); + return result; +} + +static int +anv_gem_context_get_reset_stats(int fd, int context, + uint32_t *active, uint32_t *pending) +{ + struct drm_i915_reset_stats stats = { + .ctx_id = context, + }; + + int ret = intel_ioctl(fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats); + if (ret == 0) { + *active = stats.batch_active; + *pending = stats.batch_pending; + } + + return ret; +} + +VkResult +anv_i915_device_check_status(struct vk_device *vk_device) +{ + struct anv_device *device = container_of(vk_device, struct anv_device, vk); + uint32_t active = 0, pending = 0; + int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id, + &active, &pending); + if (ret == -1) { + /* We don't know the real error. */ + return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m"); + } + + if (active) { + return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers"); + } else if (pending) { + return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight"); + } + + return VK_SUCCESS; +} diff --git a/lib/mesa/src/intel/vulkan/i915/anv_device.h b/lib/mesa/src/intel/vulkan/i915/anv_device.h new file mode 100644 index 000000000..af42c2241 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/i915/anv_device.h @@ -0,0 +1,39 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include "vulkan/vulkan_core.h" +#include "vk_device.h" + +struct anv_device; +struct anv_physical_device; + +VkResult +anv_i915_physical_device_get_parameters(struct anv_physical_device *device); + +VkResult +anv_i915_device_setup_context(struct anv_device *device, + const VkDeviceCreateInfo *pCreateInfo, + const uint32_t num_queues); + +VkResult anv_i915_device_check_status(struct vk_device *vk_device); diff --git a/lib/mesa/src/intel/vulkan/i915/anv_kmd_backend.c b/lib/mesa/src/intel/vulkan/i915/anv_kmd_backend.c new file mode 100644 index 000000000..a3c26dede --- /dev/null +++ b/lib/mesa/src/intel/vulkan/i915/anv_kmd_backend.c @@ -0,0 +1,184 @@ +/* + * Copyright © 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <sys/mman.h> + +#include "anv_private.h" + +#include "i915/anv_batch_chain.h" + +#include "drm-uapi/i915_drm.h" + +static uint32_t +i915_gem_create(struct anv_device *device, + const struct intel_memory_class_instance **regions, + uint16_t num_regions, uint64_t size, + enum anv_bo_alloc_flags alloc_flags, + uint64_t *actual_size) +{ + if (unlikely(!device->info->mem.use_class_instance)) { + assert(num_regions == 1 && + device->physical->sys.region == regions[0]); + + struct drm_i915_gem_create gem_create = { + .size = size, + }; + if (intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) + return 0; + + *actual_size = gem_create.size; + return gem_create.handle; + } + + struct drm_i915_gem_memory_class_instance i915_regions[2]; + assert(num_regions <= ARRAY_SIZE(i915_regions)); + + for (uint16_t i = 0; i < num_regions; i++) { + i915_regions[i].memory_class = regions[i]->klass; + i915_regions[i].memory_instance = regions[i]->instance; + } + + uint32_t flags = 0; + if (alloc_flags & (ANV_BO_ALLOC_MAPPED | ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE) && + !(alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)) + if (device->physical->vram_non_mappable.size > 0) + flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS; + + struct drm_i915_gem_create_ext_memory_regions ext_regions = { + .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS }, + .num_regions = num_regions, + .regions = (uintptr_t)i915_regions, + }; + struct drm_i915_gem_create_ext gem_create = { + .size = size, + .extensions = (uintptr_t) &ext_regions, + .flags = flags, + }; + + if (intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &gem_create)) + return 0; + + *actual_size = gem_create.size; + return gem_create.handle; +} + +static void +i915_gem_close(struct anv_device *device, uint32_t handle) +{ + struct drm_gem_close close = { + .handle = handle, + }; + + intel_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +static void * +i915_gem_mmap_offset(struct anv_device *device, struct anv_bo *bo, + uint64_t size, uint32_t flags) +{ + struct drm_i915_gem_mmap_offset gem_mmap = { + .handle = bo->gem_handle, + .flags = flags, + }; + if (intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &gem_mmap)) + return MAP_FAILED; + + return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + device->fd, gem_mmap.offset); +} + +static void * +i915_gem_mmap_legacy(struct anv_device *device, struct anv_bo *bo, uint64_t offset, + uint64_t size, uint32_t flags) +{ + struct drm_i915_gem_mmap gem_mmap = { + .handle = bo->gem_handle, + .offset = offset, + .size = size, + .flags = flags, + }; + if (intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap)) + return MAP_FAILED; + + return (void *)(uintptr_t) gem_mmap.addr_ptr; +} + +static uint32_t +mmap_calc_flags(struct anv_device *device, struct anv_bo *bo, + VkMemoryPropertyFlags property_flags) +{ + if (device->info->has_local_mem) + return I915_MMAP_OFFSET_FIXED; + + uint32_t flags = 0; + if (!device->info->has_llc && + (property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) + flags |= I915_MMAP_WC; + if (bo->map_wc) + flags |= I915_MMAP_WC; + if (!(property_flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)) + flags |= I915_MMAP_WC; + + if (likely(device->physical->info.has_mmap_offset)) + flags = (flags & I915_MMAP_WC) ? I915_MMAP_OFFSET_WC : I915_MMAP_OFFSET_WB; + return flags; +} + +static void * +i915_gem_mmap(struct anv_device *device, struct anv_bo *bo, uint64_t offset, + uint64_t size, VkMemoryPropertyFlags property_flags) +{ + const uint32_t flags = mmap_calc_flags(device, bo, property_flags); + + if (likely(device->physical->info.has_mmap_offset)) + return i915_gem_mmap_offset(device, bo, size, flags); + return i915_gem_mmap_legacy(device, bo, offset, size, flags); +} + +static int +i915_gem_vm_bind(struct anv_device *device, struct anv_bo *bo) +{ + return 0; +} + +static int +i915_gem_vm_unbind(struct anv_device *device, struct anv_bo *bo) +{ + return 0; +} + +const struct anv_kmd_backend * +anv_i915_kmd_backend_get(void) +{ + static const struct anv_kmd_backend i915_backend = { + .gem_create = i915_gem_create, + .gem_close = i915_gem_close, + .gem_mmap = i915_gem_mmap, + .gem_vm_bind = i915_gem_vm_bind, + .gem_vm_unbind = i915_gem_vm_unbind, + .execute_simple_batch = i915_execute_simple_batch, + .queue_exec_locked = i915_queue_exec_locked, + .queue_exec_trace = i915_queue_exec_trace, + }; + return &i915_backend; +} diff --git a/lib/mesa/src/intel/vulkan/layers/anv_android_layer.c b/lib/mesa/src/intel/vulkan/layers/anv_android_layer.c new file mode 100644 index 000000000..e36eb820a --- /dev/null +++ b/lib/mesa/src/intel/vulkan/layers/anv_android_layer.c @@ -0,0 +1,46 @@ +/* + * Copyright © 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +VKAPI_ATTR VkResult VKAPI_CALL +android_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + const struct util_format_description *fmt = + vk_format_description(pCreateInfo->format); + + /* Throw error in case application tries to create ASTC view on gfx125. + * This is done to avoid gpu hang that can result in using the unsupported + * format. + */ + if (fmt && fmt->layout == UTIL_FORMAT_LAYOUT_ASTC && + device->info->verx10 >= 125) { + return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY, + "ASTC format not supported (%s).", __func__); + } + return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView); +} diff --git a/lib/mesa/src/intel/vulkan/layers/anv_doom64.c b/lib/mesa/src/intel/vulkan/layers/anv_doom64.c new file mode 100644 index 000000000..80ca74f97 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/layers/anv_doom64.c @@ -0,0 +1,134 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/set.h" +#include "anv_private.h" +#include "vk_common_entrypoints.h" + +/** + * The DOOM 64 rendering corruption is happening because the game always uses + * ``` + * vkCmdPipelineBarrier(VK_IMAGE_LAYOUT_UNDEFINED -> + * VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) + * vkCmdCopyBufferToImage(...) + * vkCmdPipelineBarrier(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL -> + * VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + * ``` + * when it wants to update its texture atlas image. + * + * According to spec, transitioning from VK_IMAGE_LAYOUT_UNDEFINED means + * that the current image content might be discarded, but the game relies + * on it being fully preserved. + * + * This work-around layer implements super-barebone layout tracking: allows + * the first transition from VK_IMAGE_LAYOUT_UNDEFINED, but replaces + * oldLayout with VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL for each + * subsequent transition of that image. + * + * Gen12+ does not ambiguate CCS data on transition from VK_IMAGE_LAYOUT_UNDEFINED + * so it preserves all compressed information, and this WA is not needed. + */ + +VKAPI_ATTR void VKAPI_CALL +doom64_CmdPipelineBarrier(VkCommandBuffer commandBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, + VkDependencyFlags dependencyFlags, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, command_buffer, commandBuffer); + assert(command_buffer && command_buffer->device); + + VkImageMemoryBarrier fixed_barrier; + struct set * defined_images = + command_buffer->device->workarounds.doom64_images; + + if (defined_images && + imageMemoryBarrierCount == 1 && pImageMemoryBarriers && + pImageMemoryBarriers[0].oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && + pImageMemoryBarriers[0].newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + ANV_FROM_HANDLE(anv_image, image, pImageMemoryBarriers[0].image); + + if (!_mesa_set_search(defined_images, image)) { + _mesa_set_add(defined_images, image); + } else { + memcpy(&fixed_barrier, pImageMemoryBarriers, sizeof(VkImageMemoryBarrier)); + + fixed_barrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + pImageMemoryBarriers = (const VkImageMemoryBarrier*) &fixed_barrier; + } + } + + vk_common_CmdPipelineBarrier(commandBuffer, srcStageMask, dstStageMask, + dependencyFlags, memoryBarrierCount, + pMemoryBarriers, bufferMemoryBarrierCount, + pBufferMemoryBarriers, + imageMemoryBarrierCount, + pImageMemoryBarriers); +} + +VKAPI_ATTR VkResult VKAPI_CALL +doom64_CreateImage(VkDevice _device, const VkImageCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkImage* pImage) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + assert(device); + + if (!device->workarounds.doom64_images) { + device->workarounds.doom64_images = _mesa_pointer_set_create(NULL); + + if (!device->workarounds.doom64_images) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + + return anv_CreateImage(_device, pCreateInfo, pAllocator, pImage); +} + +VKAPI_ATTR void VKAPI_CALL +doom64_DestroyImage(VkDevice _device, VkImage _image, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image, image, _image); + assert(device); + + struct set * defined_images = device->workarounds.doom64_images; + + if (image && defined_images) { + _mesa_set_remove_key(defined_images, image); + + if (!defined_images->entries) { + _mesa_set_destroy(defined_images, NULL); + device->workarounds.doom64_images = NULL; + } + } + + anv_DestroyImage(_device, _image, pAllocator); +} diff --git a/lib/mesa/src/intel/vulkan/meson.build b/lib/mesa/src/intel/vulkan/meson.build index 9e54716df..be8a37e84 100644 --- a/lib/mesa/src/intel/vulkan/meson.build +++ b/lib/mesa/src/intel/vulkan/meson.build @@ -18,11 +18,13 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +subdir('shaders') + inc_anv = include_directories('.') anv_flags = [ no_override_init_args, - c_sse2_args, + sse2_args, ] anv_cpp_flags = [] @@ -38,24 +40,13 @@ anv_entrypoints = custom_target( '--device-prefix', 'gfx11', '--device-prefix', 'gfx12', '--device-prefix', 'gfx125', - '--device-prefix', 'hitman3' + '--device-prefix', 'doom64', + '--device-prefix', 'hitman3', + '--device-prefix', 'android' ], depend_files : vk_entrypoints_gen_depend_files, ) -float64_spv_h = custom_target( - 'float64_spv.h', - input : [glsl2spirv, float64_glsl_file], - output : 'float64_spv.h', - command : [ - prog_python, '@INPUT@', '@OUTPUT@', - '--create-entry', 'main', - '--vn', 'float64_spv_source', - '--glsl-version', '450', - '-Olib', - ] -) - idep_anv_headers = declare_dependency( sources : [anv_entrypoints[0]], include_directories : inc_anv, @@ -87,23 +78,21 @@ intel_icd = custom_target( install : true, ) -if meson.version().version_compare('>= 0.58') - _dev_icdname = 'intel_devenv_icd.@0@.json'.format(host_machine.cpu()) - custom_target( - 'intel_devenv_icd', - input : [vk_icd_gen, vk_api_xml], - output : _dev_icdname, - command : [ - prog_python, '@INPUT0@', - '--api-version', '1.3', '--xml', '@INPUT1@', - '--lib-path', meson.current_build_dir() / 'libvulkan_intel.so', - '--out', '@OUTPUT@', - ], - build_by_default : true, - ) +_dev_icdname = 'intel_devenv_icd.@0@.json'.format(host_machine.cpu()) +_dev_icd = custom_target( + 'intel_devenv_icd', + input : [vk_icd_gen, vk_api_xml], + output : _dev_icdname, + command : [ + prog_python, '@INPUT0@', + '--api-version', '1.3', '--xml', '@INPUT1@', + '--lib-path', meson.current_build_dir() / 'libvulkan_intel.so', + '--out', '@OUTPUT@', + ], + build_by_default : true, +) - devenv.append('VK_ICD_FILENAMES', meson.current_build_dir() / _dev_icdname) -endif +devenv.append('VK_ICD_FILENAMES', _dev_icd.full_path()) libanv_per_hw_ver_libs = [] anv_per_hw_ver_files = files( @@ -113,6 +102,7 @@ anv_per_hw_ver_files = files( 'genX_pipeline.c', 'genX_query.c', 'genX_state.c', + 'genX_video.c', ) if with_intel_vk_rt anv_per_hw_ver_files += files('genX_acceleration_structure.c',) @@ -125,7 +115,7 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']], _gfx_ver = g[0] libanv_per_hw_ver_libs += static_library( 'anv_per_hw_ver@0@'.format(_gfx_ver), - [anv_per_hw_ver_files, g[1], anv_entrypoints[0]], + [anv_per_hw_ver_files, g[1], anv_entrypoints[0], generated_draws_spvs, ], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel, ], @@ -141,7 +131,21 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']], endforeach libanv_files = files( + 'i915/anv_batch_chain.c', + 'i915/anv_batch_chain.h', + 'i915/anv_device.c', + 'i915/anv_device.h', + 'i915/anv_kmd_backend.c', + 'layers/anv_doom64.c', 'layers/anv_hitman3.c', + 'layers/anv_android_layer.c', + 'xe/anv_batch_chain.c', + 'xe/anv_batch_chain.h', + 'xe/anv_kmd_backend.c', + 'xe/anv_device.c', + 'xe/anv_device.h', + 'xe/anv_queue.c', + 'xe/anv_queue.h', 'anv_allocator.c', 'anv_android.h', 'anv_batch_chain.c', @@ -151,17 +155,19 @@ libanv_files = files( 'anv_descriptor_set.c', 'anv_device.c', 'anv_formats.c', + 'anv_generated_indirect_draws.c', 'anv_genX.h', 'anv_image.c', + 'anv_kmd_backend.c', + 'anv_kmd_backend.h', 'anv_measure.c', 'anv_measure.h', + 'anv_mesh_perprim_wa.c', 'anv_nir.h', - 'anv_nir_add_base_work_group_id.c', 'anv_nir_apply_pipeline_layout.c', 'anv_nir_compute_push_layout.c', 'anv_nir_lower_multiview.c', 'anv_nir_lower_ubo_loads.c', - 'anv_nir_lower_ycbcr_textures.c', 'anv_nir_push_descriptor_analysis.c', 'anv_perf.c', 'anv_pipeline.c', @@ -170,6 +176,7 @@ libanv_files = files( 'anv_queue.c', 'anv_util.c', 'anv_utrace.c', + 'anv_video.c', 'anv_wsi.c', ) @@ -208,6 +215,7 @@ libanv_common = static_library( [ libanv_files, anv_entrypoints, sha1_h, gen_xml_pack, float64_spv_h, + generated_draws_spvs, ], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler, @@ -216,7 +224,7 @@ libanv_common = static_library( c_args : anv_flags, cpp_args : anv_cpp_flags, gnu_symbol_visibility : 'hidden', - dependencies : anv_deps, + dependencies : anv_deps ) libvulkan_intel = shared_library( @@ -227,17 +235,18 @@ libvulkan_intel = shared_library( ], link_whole : [libanv_common, libanv_per_hw_ver_libs] + optional_libgrl, link_with : [ - libintel_compiler, libintel_dev, libisl, libblorp, libintel_perf, + libintel_compiler, libisl, libblorp, libintel_perf, ], dependencies : [ dep_thread, dep_dl, dep_m, anv_deps, idep_libintel_common, idep_nir, idep_genxml, idep_vulkan_util, idep_vulkan_wsi, idep_vulkan_runtime, idep_mesautil, idep_xmlconfig, - idep_intel_driver_ds, + idep_intel_driver_ds, idep_intel_dev, ], c_args : anv_flags, gnu_symbol_visibility : 'hidden', - link_args : [ld_args_build_id, ld_args_bsymbolic, ld_args_gc_sections], + link_args : [vulkan_icd_link_args, ld_args_build_id, ld_args_bsymbolic, ld_args_gc_sections], + link_depends : vulkan_icd_link_depends, install : true, ) @@ -263,13 +272,13 @@ if with_tests ], link_whole : libanv_common, link_with : [ - libanv_per_hw_ver_libs, libintel_compiler, libintel_common, libintel_dev, + libanv_per_hw_ver_libs, libintel_compiler, libintel_common, libisl, libblorp, libintel_perf, ] + optional_libgrl, dependencies : [ dep_thread, dep_dl, dep_m, anv_deps, idep_nir, idep_vulkan_util, idep_vulkan_wsi, idep_vulkan_runtime, - idep_mesautil, + idep_mesautil, idep_intel_dev, ], c_args : anv_flags, gnu_symbol_visibility : 'hidden', @@ -283,12 +292,12 @@ if with_tests executable( t, ['tests/@0@.c'.format(t), anv_entrypoints[0]], - c_args : [ c_sse2_args ], + c_args : [ sse2_args ], link_with : libvulkan_intel_test, dependencies : [ dep_libdrm, dep_thread, dep_m, dep_valgrind, idep_vulkan_util, idep_vulkan_wsi_headers, - idep_vulkan_runtime, idep_intel_driver_ds, + idep_vulkan_runtime, idep_intel_driver_ds, idep_intel_dev, ], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler, diff --git a/lib/mesa/src/intel/vulkan/shaders/common_generated_draws.glsl b/lib/mesa/src/intel/vulkan/shaders/common_generated_draws.glsl new file mode 100644 index 000000000..06ea7781c --- /dev/null +++ b/lib/mesa/src/intel/vulkan/shaders/common_generated_draws.glsl @@ -0,0 +1,133 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define BITFIELD_BIT(i) (1u << i) + +#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0) +#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1) +#define ANV_GENERATED_FLAG_DRAWID BITFIELD_BIT(2) +#define ANV_GENERATED_FLAG_BASE BITFIELD_BIT(3) + +/* These 3 bindings will be accessed through A64 messages */ +layout(set = 0, binding = 0, std430) buffer Storage0 { + uint indirect_data[]; +}; + +layout(set = 0, binding = 1, std430) buffer Storage1 { + uint commands[]; +}; + +layout(set = 0, binding = 2, std430) buffer Storage2 { + uint draw_ids[]; +}; + +/* This data will be provided through push constants. */ +layout(set = 0, binding = 3) uniform block { + uint64_t draw_id_addr; + uint64_t indirect_data_addr; + uint indirect_data_stride; + uint flags; + uint draw_base; + uint draw_count; + uint max_draw_count; + uint instance_multiplier; + uint64_t end_addr; +}; + +void write_VERTEX_BUFFER_STATE(uint write_offset, + uint mocs, + uint buffer_idx, + uint64_t address, + uint size) +{ + commands[write_offset + 0] = (0 << 0 | /* Buffer Pitch */ + 0 << 13 | /* Null Vertex Buffer */ + 1 << 14 | /* Address Modify Enable */ + mocs << 16 | /* MOCS */ + buffer_idx << 26); /* Vertex Buffer Index */ + commands[write_offset + 1] = uint(address & 0xffffffff); + commands[write_offset + 2] = uint(address >> 32); + commands[write_offset + 3] = size; +} + +void write_3DPRIMITIVE(uint write_offset, + bool is_predicated, + bool is_indexed, + uint vertex_count_per_instance, + uint start_vertex_location, + uint instance_count, + uint start_instance_location, + uint base_vertex_location) +{ + commands[write_offset + 0] = (3 << 29 | /* Command Type */ + 3 << 27 | /* Command SubType */ + 3 << 24 | /* 3D Command Opcode */ + uint(is_predicated) << 8 | + 5 << 0); /* DWord Length */ + commands[write_offset + 1] = uint(is_indexed) << 8; + commands[write_offset + 2] = vertex_count_per_instance; + commands[write_offset + 3] = start_vertex_location; + commands[write_offset + 4] = instance_count; + commands[write_offset + 5] = start_instance_location; + commands[write_offset + 6] = base_vertex_location; +} + +void write_3DPRIMITIVE_EXTENDED(uint write_offset, + bool is_predicated, + bool is_indexed, + uint vertex_count_per_instance, + uint start_vertex_location, + uint instance_count, + uint start_instance_location, + uint base_vertex_location, + uint param_base_vertex, + uint param_base_instance, + uint param_draw_id) +{ + commands[write_offset + 0] = (3 << 29 | /* Command Type */ + 3 << 27 | /* Command SubType */ + 3 << 24 | /* 3D Command Opcode */ + 1 << 11 | /* Extended Parameter Enable */ + uint(is_predicated) << 8 | + 8 << 0); /* DWord Length */ + commands[write_offset + 1] = uint(is_indexed) << 8; + commands[write_offset + 2] = vertex_count_per_instance; + commands[write_offset + 3] = start_vertex_location; + commands[write_offset + 4] = instance_count; + commands[write_offset + 5] = start_instance_location; + commands[write_offset + 6] = base_vertex_location; + commands[write_offset + 7] = param_base_vertex; + commands[write_offset + 8] = param_base_instance; + commands[write_offset + 9] = param_draw_id; +} + +void write_MI_BATCH_BUFFER_START(uint write_offset, + uint64_t addr) +{ + commands[write_offset + 0] = (0 << 29 | /* Command Type */ + 49 << 23 | /* MI Command Opcode */ + 1 << 8 | /* Address Space Indicator (PPGTT) */ + 1 << 0); /* DWord Length */ + commands[write_offset + 1] = uint(addr & 0xffffffff); + commands[write_offset + 2] = uint(addr >> 32); +} diff --git a/lib/mesa/src/intel/vulkan/shaders/gfx11_generated_draws.glsl b/lib/mesa/src/intel/vulkan/shaders/gfx11_generated_draws.glsl new file mode 100644 index 000000000..8745f7bab --- /dev/null +++ b/lib/mesa/src/intel/vulkan/shaders/gfx11_generated_draws.glsl @@ -0,0 +1,85 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#version 450 +#extension GL_ARB_gpu_shader_int64 : enable +#extension GL_GOOGLE_include_directive : enable + +#include "common_generated_draws.glsl" + +void main() +{ + bool is_indexed = (flags & ANV_GENERATED_FLAG_INDEXED) != 0; + bool is_predicated = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0; + uint _3dprim_dw_size = (flags >> 16) & 0xff; + uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x); + uint indirect_data_offset = item_idx * indirect_data_stride / 4; + uint cmd_idx = item_idx * _3dprim_dw_size; + uint draw_id = draw_base + item_idx; + + if (draw_id < draw_count) { + if (is_indexed) { + /* Loading a VkDrawIndexedIndirectCommand */ + uint index_count = indirect_data[indirect_data_offset + 0]; + uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier; + uint first_index = indirect_data[indirect_data_offset + 2]; + uint vertex_offset = indirect_data[indirect_data_offset + 3]; + uint first_instance = indirect_data[indirect_data_offset + 4]; + + write_3DPRIMITIVE_EXTENDED(cmd_idx, + is_predicated, + is_indexed, + index_count, + first_index, + instance_count, + first_instance, + vertex_offset, + vertex_offset, + first_instance, + draw_id); + } else { + /* Loading a VkDrawIndirectCommand structure */ + uint vertex_count = indirect_data[indirect_data_offset + 0]; + uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier; + uint first_vertex = indirect_data[indirect_data_offset + 2]; + uint first_instance = indirect_data[indirect_data_offset + 3]; + + write_3DPRIMITIVE_EXTENDED(cmd_idx, + is_predicated, + is_indexed, + vertex_count, + first_vertex, + instance_count, + first_instance, + 0 /* base_vertex_location */, + first_vertex, + first_instance, + draw_id); + } + } else if (draw_id == draw_count && draw_id < max_draw_count) { + /* Only write a jump forward in the batch if we have fewer elements than + * the max draw count. + */ + write_MI_BATCH_BUFFER_START(cmd_idx, end_addr); + } +} diff --git a/lib/mesa/src/intel/vulkan/shaders/gfx9_generated_draws.glsl b/lib/mesa/src/intel/vulkan/shaders/gfx9_generated_draws.glsl new file mode 100644 index 000000000..9850b19c3 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/shaders/gfx9_generated_draws.glsl @@ -0,0 +1,144 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#version 450 +#extension GL_ARB_gpu_shader_int64 : enable +#extension GL_GOOGLE_include_directive : enable + +#include "common_generated_draws.glsl" + +void main() +{ + bool is_indexed = (flags & ANV_GENERATED_FLAG_INDEXED) != 0; + bool is_predicated = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0; + bool uses_base = (flags & ANV_GENERATED_FLAG_BASE) != 0; + bool uses_drawid = (flags & ANV_GENERATED_FLAG_DRAWID) != 0; + uint mocs = (flags >> 8) & 0xff; + uint _3dprim_dw_size = (flags >> 16) & 0xff; + uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x); + uint indirect_data_offset = item_idx * indirect_data_stride / 4; + uint cmd_idx = item_idx * _3dprim_dw_size; + uint draw_id = draw_base + item_idx; + + if (draw_id < draw_count) { + if (is_indexed) { + /* Loading a VkDrawIndexedIndirectCommand */ + uint index_count = indirect_data[indirect_data_offset + 0]; + uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier; + uint first_index = indirect_data[indirect_data_offset + 2]; + uint vertex_offset = indirect_data[indirect_data_offset + 3]; + uint first_instance = indirect_data[indirect_data_offset + 4]; + + if (uses_base || uses_drawid) { + uint state_vertex_len = + 1 + (uses_base ? 4 : 0) + (uses_drawid ? 4 : 0); + commands[cmd_idx] = + (3 << 29 | /* Command Type */ + 3 << 27 | /* Command SubType */ + 0 << 24 | /* 3D Command Opcode */ + 8 << 16 | /* 3D Command Sub Opcode */ + (state_vertex_len - 2) << 0); /* DWord Length */ + cmd_idx += 1; + if (uses_base) { + uint64_t indirect_draw_data_addr = + indirect_data_addr + item_idx * indirect_data_stride + 12; + write_VERTEX_BUFFER_STATE(cmd_idx, + mocs, + 31, + indirect_draw_data_addr, + 8); + cmd_idx += 4; + } + if (uses_drawid) { + uint64_t draw_idx_addr = draw_id_addr + 4 * item_idx; + draw_ids[draw_id] = draw_id; + write_VERTEX_BUFFER_STATE(cmd_idx, + mocs, + 32, + draw_idx_addr, + 4); + cmd_idx += 4; + } + } + write_3DPRIMITIVE(cmd_idx, + is_predicated, + is_indexed, + index_count, + first_index, + instance_count, + first_instance, + vertex_offset); + } else { + /* Loading a VkDrawIndirectCommand structure */ + uint vertex_count = indirect_data[indirect_data_offset + 0]; + uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier; + uint first_vertex = indirect_data[indirect_data_offset + 2]; + uint first_instance = indirect_data[indirect_data_offset + 3]; + + if (uses_base || uses_drawid) { + uint state_vertex_len = + 1 + (uses_base ? 4 : 0) + (uses_drawid ? 4 : 0); + commands[cmd_idx] = + (3 << 29 | /* Command Type */ + 3 << 27 | /* Command SubType */ + 0 << 24 | /* 3D Command Opcode */ + 8 << 16 | /* 3D Command Sub Opcode */ + (state_vertex_len - 2) << 0); /* DWord Length */ + cmd_idx += 1; + if (uses_base) { + uint64_t indirect_draw_data_addr = + indirect_data_addr + item_idx * indirect_data_stride + 8; + write_VERTEX_BUFFER_STATE(cmd_idx, + mocs, + 31, + indirect_draw_data_addr, + 8); + cmd_idx += 4; + } + if (uses_drawid) { + uint64_t draw_idx_addr = draw_id_addr + 4 * item_idx; + draw_ids[draw_id] = draw_id; + write_VERTEX_BUFFER_STATE(cmd_idx, + mocs, + 32, + draw_idx_addr, + 4); + cmd_idx += 4; + } + } + write_3DPRIMITIVE(cmd_idx, + is_predicated, + is_indexed, + vertex_count, + first_vertex, + instance_count, + first_instance, + 0 /* base_vertex_location */); + } + } else if (draw_id == draw_count && draw_id < max_draw_count) { + /* Only write a jump forward in the batch if we have fewer elements than + * the max draw count. + */ + write_MI_BATCH_BUFFER_START(cmd_idx, end_addr); + } +} diff --git a/lib/mesa/src/intel/vulkan/shaders/meson.build b/lib/mesa/src/intel/vulkan/shaders/meson.build new file mode 100644 index 000000000..2f1952ee5 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/shaders/meson.build @@ -0,0 +1,56 @@ +# Copyright © 2022 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +float64_spv_h = custom_target( + 'float64_spv.h', + input : [glsl2spirv, float64_glsl_file], + output : 'float64_spv.h', + command : [ + prog_python, '@INPUT@', '@OUTPUT@', + prog_glslang, + '--create-entry', 'main', + '--vn', 'float64_spv_source', + '--glsl-version', '450', + '-Olib', + ] +) + +generated_draws_shaders = [ + 'gfx9_generated_draws.glsl', + 'gfx11_generated_draws.glsl', +] + +generated_draws_spvs = [] +foreach f : generated_draws_shaders + spv_filename = f.replace('.glsl', '_spv.h') + src_name = f.replace('.glsl', '_spv_source') + generated_draws_spvs += custom_target( + spv_filename, + input : [glsl2spirv, f, files('common_generated_draws.glsl')], + output : spv_filename, + command : [ + prog_python, '@INPUT0@', '@INPUT1@', '@OUTPUT@', + prog_glslang, + '--vn', src_name, + '--glsl-version', '450', + '--stage', 'frag', + '-I' + meson.current_source_dir(), + ]) +endforeach diff --git a/lib/mesa/src/intel/vulkan/tests/block_pool_grow_first.c b/lib/mesa/src/intel/vulkan/tests/block_pool_grow_first.c index 7359b66cb..5ad230392 100644 --- a/lib/mesa/src/intel/vulkan/tests/block_pool_grow_first.c +++ b/lib/mesa/src/intel/vulkan/tests/block_pool_grow_first.c @@ -36,7 +36,9 @@ int main(void) const uint32_t block_size = 16 * 1024; const uint32_t initial_size = block_size / 2; + test_device_info_init(&physical_device.info); anv_device_set_physical(&device, &physical_device); + device.kmd_backend = anv_kmd_backend_get(INTEL_KMD_TYPE_STUB); pthread_mutex_init(&device.mutex, NULL); anv_bo_cache_init(&device.bo_cache, &device); anv_block_pool_init(&pool, &device, "test", 4096, initial_size); diff --git a/lib/mesa/src/intel/vulkan/tests/state_pool_padding.c b/lib/mesa/src/intel/vulkan/tests/state_pool_padding.c index b76ba8ad6..845767a35 100644 --- a/lib/mesa/src/intel/vulkan/tests/state_pool_padding.c +++ b/lib/mesa/src/intel/vulkan/tests/state_pool_padding.c @@ -30,7 +30,9 @@ int main(void) struct anv_device device = {}; struct anv_state_pool state_pool; + test_device_info_init(&physical_device.info); anv_device_set_physical(&device, &physical_device); + device.kmd_backend = anv_kmd_backend_get(INTEL_KMD_TYPE_STUB); pthread_mutex_init(&device.mutex, NULL); anv_bo_cache_init(&device.bo_cache, &device); anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 4096); diff --git a/lib/mesa/src/intel/vulkan/tests/test_common.h b/lib/mesa/src/intel/vulkan/tests/test_common.h index 3f883e3bd..ae84935f3 100644 --- a/lib/mesa/src/intel/vulkan/tests/test_common.h +++ b/lib/mesa/src/intel/vulkan/tests/test_common.h @@ -32,3 +32,8 @@ abort(); \ } \ } while (false) + +static inline void test_device_info_init(struct intel_device_info *info) +{ + info->mem_alignment = 4096; +} diff --git a/lib/mesa/src/intel/vulkan/xe/anv_batch_chain.c b/lib/mesa/src/intel/vulkan/xe/anv_batch_chain.c new file mode 100644 index 000000000..dbcf989d7 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/xe/anv_batch_chain.c @@ -0,0 +1,281 @@ +/* + * Copyright © 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "xe/anv_batch_chain.h" + +#include "anv_private.h" + +#include <xf86drm.h> + +#include "drm-uapi/xe_drm.h" + +VkResult +xe_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo, + uint32_t batch_bo_size) +{ + struct anv_device *device = queue->device; + VkResult result = VK_SUCCESS; + uint32_t syncobj_handle; + + if (drmSyncobjCreate(device->fd, 0, &syncobj_handle)) + return vk_errorf(device, VK_ERROR_UNKNOWN, "Unable to create sync obj"); + + struct drm_xe_sync sync = { + .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, + .handle = syncobj_handle, + }; + struct drm_xe_exec exec = { + .engine_id = queue->engine_id, + .num_batch_buffer = 1, + .address = batch_bo->offset, + .num_syncs = 1, + .syncs = (uintptr_t)&sync, + }; + + if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) { + result = vk_device_set_lost(&device->vk, "XE_EXEC failed: %m"); + goto exec_error; + } + + struct drm_syncobj_wait wait = { + .handles = (uintptr_t)&syncobj_handle, + .timeout_nsec = INT64_MAX, + .count_handles = 1, + }; + if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait)) + result = vk_device_set_lost(&device->vk, "DRM_IOCTL_SYNCOBJ_WAIT failed: %m"); + +exec_error: + drmSyncobjDestroy(device->fd, syncobj_handle); + + return result; +} + +#define TYPE_SIGNAL true +#define TYPE_WAIT false + +static void +xe_exec_fill_sync(struct drm_xe_sync *xe_sync, struct vk_sync *vk_sync, + uint64_t value, bool signal) +{ + if (unlikely(!vk_sync_type_is_drm_syncobj(vk_sync->type))) { + unreachable("Unsupported sync type"); + return; + } + + const struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(vk_sync); + xe_sync->handle = syncobj->syncobj; + + if (value) { + xe_sync->flags |= DRM_XE_SYNC_TIMELINE_SYNCOBJ; + xe_sync->timeline_value = value; + } else { + xe_sync->flags |= DRM_XE_SYNC_SYNCOBJ; + } + + if (signal) + xe_sync->flags |= DRM_XE_SYNC_SIGNAL; +} + +static VkResult +xe_exec_process_syncs(struct anv_queue *queue, + uint32_t wait_count, const struct vk_sync_wait *waits, + uint32_t signal_count, const struct vk_sync_signal *signals, + struct anv_utrace_submit *utrace_submit, + struct drm_xe_sync **ret, uint32_t *ret_count) +{ + struct anv_device *device = queue->device; + uint32_t num_syncs = wait_count + signal_count + (utrace_submit ? 1 : 0) + + (queue->sync ? 1 : 0); + + if (!num_syncs) + return VK_SUCCESS; + + struct drm_xe_sync *xe_syncs = vk_zalloc(&device->vk.alloc, + sizeof(*xe_syncs) * num_syncs, 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!xe_syncs) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + uint32_t count = 0; + + /* Signal the utrace sync only if it doesn't have a batch. Otherwise the + * it's the utrace batch that should signal its own sync. + */ + if (utrace_submit && !utrace_submit->batch_bo) { + struct drm_xe_sync *xe_sync = &xe_syncs[count++]; + + xe_exec_fill_sync(xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL); + } + + for (uint32_t i = 0; i < wait_count; i++) { + struct drm_xe_sync *xe_sync = &xe_syncs[count++]; + const struct vk_sync_wait *vk_wait = &waits[i]; + + xe_exec_fill_sync(xe_sync, vk_wait->sync, vk_wait->wait_value, + TYPE_WAIT); + } + + for (uint32_t i = 0; i < signal_count; i++) { + struct drm_xe_sync *xe_sync = &xe_syncs[count++]; + const struct vk_sync_signal *vk_signal = &signals[i]; + + xe_exec_fill_sync(xe_sync, vk_signal->sync, vk_signal->signal_value, + TYPE_SIGNAL); + } + + if (queue->sync) { + struct drm_xe_sync *xe_sync = &xe_syncs[count++]; + + xe_exec_fill_sync(xe_sync, queue->sync, 0, + TYPE_SIGNAL); + } + + assert(count == num_syncs); + *ret = xe_syncs; + *ret_count = num_syncs; + return VK_SUCCESS; +} + +static void +xe_exec_print_debug(struct anv_queue *queue, uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, struct anv_query_pool *perf_query_pool, + uint32_t perf_query_pass, struct drm_xe_exec *exec) +{ + if (INTEL_DEBUG(DEBUG_SUBMIT)) + fprintf(stderr, "Batch offset=0x%016"PRIx64" on queue %u\n", + (uint64_t)exec->address, queue->vk.index_in_family); + + anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers, + perf_query_pool, perf_query_pass); +} + +VkResult +xe_queue_exec_utrace_locked(struct anv_queue *queue, + struct anv_utrace_submit *utrace_submit) +{ + struct anv_device *device = queue->device; + struct drm_xe_sync xe_sync = {}; + + xe_exec_fill_sync(&xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL); + +#ifdef SUPPORT_INTEL_INTEGRATED_GPUS + if (device->physical->memory.need_clflush) + intel_flush_range(utrace_submit->batch_bo->map, + utrace_submit->batch_bo->size); +#endif + + struct drm_xe_exec exec = { + .engine_id = queue->engine_id, + .num_batch_buffer = 1, + .syncs = (uintptr_t)&xe_sync, + .num_syncs = 1, + .address = utrace_submit->batch_bo->offset, + }; + if (likely(!device->info->no_hw)) { + if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) + return vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m"); + } + + return VK_SUCCESS; +} + +VkResult +xe_queue_exec_locked(struct anv_queue *queue, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, + uint32_t signal_count, + const struct vk_sync_signal *signals, + struct anv_query_pool *perf_query_pool, + uint32_t perf_query_pass) +{ + struct anv_device *device = queue->device; + struct anv_utrace_submit *utrace_submit = NULL; + VkResult result; + + result = anv_device_utrace_flush_cmd_buffers(queue, cmd_buffer_count, + cmd_buffers, &utrace_submit); + if (result != VK_SUCCESS) + return result; + + struct drm_xe_sync *xe_syncs = NULL; + uint32_t xe_syncs_count = 0; + result = xe_exec_process_syncs(queue, wait_count, waits, + signal_count, signals, + utrace_submit, + &xe_syncs, &xe_syncs_count); + if (result != VK_SUCCESS) + return result; + + /* If we have no batch for utrace, just forget about it now. */ + if (utrace_submit && !utrace_submit->batch_bo) + utrace_submit = NULL; + + struct drm_xe_exec exec = { + .engine_id = queue->engine_id, + .num_batch_buffer = 1, + .syncs = (uintptr_t)xe_syncs, + .num_syncs = xe_syncs_count, + }; + + if (cmd_buffer_count) { + anv_cmd_buffer_chain_command_buffers(cmd_buffers, cmd_buffer_count); + +#ifdef SUPPORT_INTEL_INTEGRATED_GPUS + if (device->physical->memory.need_clflush) + anv_cmd_buffer_clflush(cmd_buffers, cmd_buffer_count); +#endif + + struct anv_cmd_buffer *first_cmd_buffer = cmd_buffers[0]; + struct anv_batch_bo *first_batch_bo = list_first_entry(&first_cmd_buffer->batch_bos, + struct anv_batch_bo, link); + exec.address = first_batch_bo->bo->offset; + } else { + exec.address = device->trivial_batch_bo->offset; + } + + xe_exec_print_debug(queue, cmd_buffer_count, cmd_buffers, perf_query_pool, + perf_query_pass, &exec); + + /* TODO: add perfetto stuff when Xe supports it */ + + if (!device->info->no_hw) { + if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) + result = vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m"); + } + vk_free(&device->vk.alloc, xe_syncs); + + if (result == VK_SUCCESS && queue->sync) { + result = vk_sync_wait(&device->vk, queue->sync, 0, + VK_SYNC_WAIT_COMPLETE, UINT64_MAX); + if (result != VK_SUCCESS) + result = vk_queue_set_lost(&queue->vk, "sync wait failed"); + } + + if (result == VK_SUCCESS && utrace_submit) + result = xe_queue_exec_utrace_locked(queue, utrace_submit); + + return result; +} diff --git a/lib/mesa/src/intel/vulkan/xe/anv_batch_chain.h b/lib/mesa/src/intel/vulkan/xe/anv_batch_chain.h new file mode 100644 index 000000000..9ee877e04 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/xe/anv_batch_chain.h @@ -0,0 +1,53 @@ +/* + * Copyright © 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include <stdint.h> + +#include "vulkan/vulkan_core.h" +#include "vk_sync.h" + +struct anv_queue; +struct anv_bo; +struct anv_cmd_buffer; +struct anv_query_pool; +struct anv_utrace_submit; + +VkResult +xe_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo, + uint32_t batch_bo_size); +VkResult +xe_queue_exec_locked(struct anv_queue *queue, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, + uint32_t signal_count, + const struct vk_sync_signal *signals, + struct anv_query_pool *perf_query_pool, + uint32_t perf_query_pass); + +VkResult +xe_queue_exec_utrace_locked(struct anv_queue *queue, + struct anv_utrace_submit *utrace_submit); diff --git a/lib/mesa/src/intel/vulkan/xe/anv_device.c b/lib/mesa/src/intel/vulkan/xe/anv_device.c new file mode 100644 index 000000000..a5827d968 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/xe/anv_device.c @@ -0,0 +1,142 @@ +/* + * Copyright © 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "xe/anv_device.h" +#include "anv_private.h" + +#include "drm-uapi/xe_drm.h" + +bool anv_xe_device_destroy_vm(struct anv_device *device) +{ + struct drm_xe_vm_destroy destroy = { + .vm_id = device->vm_id, + }; + return intel_ioctl(device->fd, DRM_IOCTL_XE_VM_DESTROY, &destroy) == 0; +} + +VkResult anv_xe_device_setup_vm(struct anv_device *device) +{ + struct drm_xe_vm_create create = { + .flags = DRM_XE_VM_CREATE_SCRATCH_PAGE, + }; + if (intel_ioctl(device->fd, DRM_IOCTL_XE_VM_CREATE, &create) != 0) + return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, + "vm creation failed"); + + device->vm_id = create.vm_id; + return VK_SUCCESS; +} + +enum drm_sched_priority +anv_vk_priority_to_drm_sched_priority(VkQueueGlobalPriorityKHR vk_priority) +{ + switch (vk_priority) { + case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR: + return DRM_SCHED_PRIORITY_MIN; + case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR: + return DRM_SCHED_PRIORITY_NORMAL; + case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR: + return DRM_SCHED_PRIORITY_HIGH; + default: + unreachable("Invalid priority"); + return DRM_SCHED_PRIORITY_MIN; + } +} + +static VkQueueGlobalPriorityKHR +drm_sched_priority_to_vk_priority(enum drm_sched_priority drm_sched_priority) +{ + switch (drm_sched_priority) { + case DRM_SCHED_PRIORITY_MIN: + return VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR; + case DRM_SCHED_PRIORITY_NORMAL: + return VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR; + case DRM_SCHED_PRIORITY_HIGH: + return VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR; + default: + unreachable("Invalid drm_sched_priority"); + return VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR; + } +} + +static void * +xe_query_alloc_fetch(struct anv_physical_device *device, uint32_t query_id) +{ + struct drm_xe_device_query query = { + .query = query_id, + }; + if (intel_ioctl(device->local_fd, DRM_IOCTL_XE_DEVICE_QUERY, &query)) + return NULL; + + void *data = calloc(1, query.size); + if (!data) + return NULL; + + query.data = (uintptr_t)data; + if (intel_ioctl(device->local_fd, DRM_IOCTL_XE_DEVICE_QUERY, &query)) { + free(data); + return NULL; + } + + return data; +} + +VkResult +anv_xe_physical_device_get_parameters(struct anv_physical_device *device) +{ + struct drm_xe_query_config *config; + + config = xe_query_alloc_fetch(device, DRM_XE_DEVICE_QUERY_CONFIG); + if (!config) + return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, + "unable to query device config"); + + device->has_exec_timeline = true; + device->max_context_priority = + drm_sched_priority_to_vk_priority(config->info[XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY]); + + free(config); + return VK_SUCCESS; +} + +VkResult +anv_xe_device_check_status(struct vk_device *vk_device) +{ + struct anv_device *device = container_of(vk_device, struct anv_device, vk); + VkResult result = VK_SUCCESS; + + for (uint32_t i = 0; i < device->queue_count; i++) { + struct drm_xe_engine_get_property engine_get_property = { + .engine_id = device->queues[i].engine_id, + .property = XE_ENGINE_GET_PROPERTY_BAN, + }; + int ret = intel_ioctl(device->fd, DRM_IOCTL_XE_ENGINE_GET_PROPERTY, + &engine_get_property); + + if (ret || engine_get_property.value) { + result = vk_device_set_lost(&device->vk, "One or more queues banned"); + break; + } + } + + return result; +} diff --git a/lib/mesa/src/intel/vulkan/xe/anv_device.h b/lib/mesa/src/intel/vulkan/xe/anv_device.h new file mode 100644 index 000000000..669d5639c --- /dev/null +++ b/lib/mesa/src/intel/vulkan/xe/anv_device.h @@ -0,0 +1,42 @@ +/* + * Copyright © 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include <stdbool.h> + +#include "vulkan/vulkan_core.h" +#include "vk_device.h" + +#include "drm-uapi/gpu_scheduler.h" + +struct anv_device; +struct anv_physical_device; + +bool anv_xe_device_destroy_vm(struct anv_device *device); +VkResult anv_xe_device_setup_vm(struct anv_device *device); +VkResult anv_xe_device_check_status(struct vk_device *vk_device); + +VkResult +anv_xe_physical_device_get_parameters(struct anv_physical_device *device); +enum drm_sched_priority +anv_vk_priority_to_drm_sched_priority(VkQueueGlobalPriorityKHR vk_priority); diff --git a/lib/mesa/src/intel/vulkan/xe/anv_kmd_backend.c b/lib/mesa/src/intel/vulkan/xe/anv_kmd_backend.c new file mode 100644 index 000000000..46c4939e4 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/xe/anv_kmd_backend.c @@ -0,0 +1,149 @@ +/* + * Copyright © 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <sys/mman.h> +#include <xf86drm.h> + +#include "anv_private.h" + +#include "xe/anv_batch_chain.h" + +#include "drm-uapi/xe_drm.h" + +static uint32_t +xe_gem_create(struct anv_device *device, + const struct intel_memory_class_instance **regions, + uint16_t regions_count, uint64_t size, + enum anv_bo_alloc_flags alloc_flags, + uint64_t *actual_size) +{ + struct drm_xe_gem_create gem_create = { + /* From xe_drm.h: If a VM is specified, this BO must: + * 1. Only ever be bound to that VM. + * 2. Cannot be exported as a PRIME fd. + */ + .vm_id = alloc_flags & ANV_BO_ALLOC_EXTERNAL ? 0 : device->vm_id, + .size = align64(size, device->info->mem_alignment), + .flags = alloc_flags & ANV_BO_ALLOC_SCANOUT ? XE_GEM_CREATE_FLAG_SCANOUT : 0, + }; + for (uint16_t i = 0; i < regions_count; i++) + gem_create.flags |= BITFIELD_BIT(regions[i]->instance); + + if (intel_ioctl(device->fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create)) + return 0; + + *actual_size = gem_create.size; + return gem_create.handle; +} + +static void +xe_gem_close(struct anv_device *device, uint32_t handle) +{ + struct drm_gem_close close = { + .handle = handle, + }; + intel_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +static void * +xe_gem_mmap(struct anv_device *device, struct anv_bo *bo, uint64_t offset, + uint64_t size, VkMemoryPropertyFlags property_flags) +{ + struct drm_xe_gem_mmap_offset args = { + .handle = bo->gem_handle, + }; + if (intel_ioctl(device->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &args)) + return MAP_FAILED; + + return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + device->fd, args.offset); +} + +static inline int +xe_gem_vm_bind_op(struct anv_device *device, struct anv_bo *bo, uint32_t op) +{ + uint32_t syncobj_handle; + int ret = drmSyncobjCreate(device->fd, 0, &syncobj_handle); + + if (ret) + return ret; + + struct drm_xe_sync sync = { + .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, + .handle = syncobj_handle, + }; + struct drm_xe_vm_bind args = { + .vm_id = device->vm_id, + .num_binds = 1, + .bind.obj = op == XE_VM_BIND_OP_UNMAP ? 0 : bo->gem_handle, + .bind.obj_offset = 0, + .bind.range = bo->actual_size, + .bind.addr = intel_48b_address(bo->offset), + .bind.op = op, + .num_syncs = 1, + .syncs = (uintptr_t)&sync, + }; + ret = intel_ioctl(device->fd, DRM_IOCTL_XE_VM_BIND, &args); + if (ret) + goto bind_error; + + struct drm_syncobj_wait wait = { + .handles = (uintptr_t)&syncobj_handle, + .timeout_nsec = INT64_MAX, + .count_handles = 1, + .flags = 0, + .first_signaled = 0, + .pad = 0, + }; + intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait); + +bind_error: + drmSyncobjDestroy(device->fd, syncobj_handle); + return ret; +} + +static int xe_gem_vm_bind(struct anv_device *device, struct anv_bo *bo) +{ + return xe_gem_vm_bind_op(device, bo, XE_VM_BIND_OP_MAP); +} + +static int xe_gem_vm_unbind(struct anv_device *device, struct anv_bo *bo) +{ + return xe_gem_vm_bind_op(device, bo, XE_VM_BIND_OP_UNMAP); +} + +const struct anv_kmd_backend * +anv_xe_kmd_backend_get(void) +{ + static const struct anv_kmd_backend xe_backend = { + .gem_create = xe_gem_create, + .gem_close = xe_gem_close, + .gem_mmap = xe_gem_mmap, + .gem_vm_bind = xe_gem_vm_bind, + .gem_vm_unbind = xe_gem_vm_unbind, + .execute_simple_batch = xe_execute_simple_batch, + .queue_exec_locked = xe_queue_exec_locked, + .queue_exec_trace = xe_queue_exec_utrace_locked, + }; + return &xe_backend; +} diff --git a/lib/mesa/src/intel/vulkan/xe/anv_queue.c b/lib/mesa/src/intel/vulkan/xe/anv_queue.c new file mode 100644 index 000000000..5c42435c7 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/xe/anv_queue.c @@ -0,0 +1,123 @@ +/* + * Copyright © 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "xe/anv_queue.h" + +#include "anv_private.h" + +#include "common/xe/intel_engine.h" +#include "common/intel_gem.h" + +#include "xe/anv_device.h" + +#include "drm-uapi/xe_drm.h" +#include "drm-uapi/gpu_scheduler.h" + +VkResult +anv_xe_create_engine(struct anv_device *device, + struct anv_queue *queue, + const VkDeviceQueueCreateInfo *pCreateInfo) +{ + struct anv_physical_device *physical = device->physical; + struct anv_queue_family *queue_family = + &physical->queue.families[pCreateInfo->queueFamilyIndex]; + const struct intel_query_engine_info *engines = physical->engine_info; + struct drm_xe_engine_class_instance *instances; + + instances = vk_alloc(&device->vk.alloc, + sizeof(*instances) * queue_family->queueCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!instances) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + /* Build a list of all compatible HW engines */ + uint32_t count = 0; + for (uint32_t i = 0; i < engines->num_engines; i++) { + const struct intel_engine_class_instance engine = engines->engines[i]; + if (engine.engine_class != queue_family->engine_class) + continue; + + instances[count].engine_class = intel_engine_class_to_xe(engine.engine_class); + instances[count].engine_instance = engine.engine_instance; + /* TODO: handle gt_id, MTL and newer platforms will have media engines + * in a separated gt + */ + instances[count++].gt_id = 0; + } + + assert(device->vm_id != 0); + struct drm_xe_engine_create create = { + /* Allows KMD to pick one of those engines for the submission queue */ + .instances = (uintptr_t)instances, + .vm_id = device->vm_id, + .width = 1, + .num_placements = count, + }; + int ret = intel_ioctl(device->fd, DRM_IOCTL_XE_ENGINE_CREATE, &create); + vk_free(&device->vk.alloc, instances); + if (ret) + return vk_errorf(device, VK_ERROR_UNKNOWN, "Unable to create engine"); + + queue->engine_id = create.engine_id; + + const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority = + vk_find_struct_const(pCreateInfo->pNext, + DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR); + const VkQueueGlobalPriorityKHR priority = queue_priority ? + queue_priority->globalPriority : + VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR; + + /* As per spec, the driver implementation may deny requests to acquire + * a priority above the default priority (MEDIUM) if the caller does not + * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR + * is returned. + */ + if (physical->max_context_priority >= VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) { + if (priority > physical->max_context_priority) + goto priority_error; + + struct drm_xe_engine_set_property engine_property = { + .engine_id = create.engine_id, + .property = XE_ENGINE_SET_PROPERTY_PRIORITY, + .value = anv_vk_priority_to_drm_sched_priority(priority), + }; + ret = intel_ioctl(device->fd, DRM_IOCTL_XE_ENGINE_SET_PROPERTY, + &engine_property); + if (ret && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) + goto priority_error; + } + + return VK_SUCCESS; + +priority_error: + anv_xe_destroy_engine(device, queue); + return vk_error(device, VK_ERROR_NOT_PERMITTED_KHR); +} + +void +anv_xe_destroy_engine(struct anv_device *device, struct anv_queue *queue) +{ + struct drm_xe_engine_destroy destroy = { + .engine_id = queue->engine_id, + }; + intel_ioctl(device->fd, DRM_IOCTL_XE_ENGINE_DESTROY, &destroy); +} diff --git a/lib/mesa/src/intel/vulkan/xe/anv_queue.h b/lib/mesa/src/intel/vulkan/xe/anv_queue.h new file mode 100644 index 000000000..646f0ef2f --- /dev/null +++ b/lib/mesa/src/intel/vulkan/xe/anv_queue.h @@ -0,0 +1,35 @@ +/* + * Copyright © 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include "vulkan/vulkan_core.h" + +struct anv_device; +struct anv_queue; + +VkResult +anv_xe_create_engine(struct anv_device *device, + struct anv_queue *queue, + const VkDeviceQueueCreateInfo *pCreateInfo); +void +anv_xe_destroy_engine(struct anv_device *device, struct anv_queue *queue); |