summaryrefslogtreecommitdiff
path: root/lib/mesa/src/intel/vulkan
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mesa/src/intel/vulkan')
-rw-r--r--lib/mesa/src/intel/vulkan/anv_android.c165
-rw-r--r--lib/mesa/src/intel/vulkan/anv_android.h8
-rw-r--r--lib/mesa/src/intel/vulkan/anv_android_stubs.c12
-rw-r--r--lib/mesa/src/intel/vulkan/anv_bo_sync.c2
-rw-r--r--lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.c352
-rw-r--r--lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.h87
-rw-r--r--lib/mesa/src/intel/vulkan/anv_kmd_backend.c42
-rw-r--r--lib/mesa/src/intel/vulkan/anv_kmd_backend.h80
-rw-r--r--lib/mesa/src/intel/vulkan/anv_mesh_perprim_wa.c557
-rw-r--r--lib/mesa/src/intel/vulkan/anv_nir_compute_push_layout.c11
-rw-r--r--lib/mesa/src/intel/vulkan/anv_nir_lower_ubo_loads.c2
-rw-r--r--lib/mesa/src/intel/vulkan/anv_perf.c14
-rw-r--r--lib/mesa/src/intel/vulkan/anv_utrace.c276
-rw-r--r--lib/mesa/src/intel/vulkan/anv_video.c267
-rw-r--r--lib/mesa/src/intel/vulkan/genX_acceleration_structure.c140
-rw-r--r--lib/mesa/src/intel/vulkan/genX_cmd_draw_generated_indirect.h750
-rw-r--r--lib/mesa/src/intel/vulkan/genX_cmd_draw_helpers.h154
-rw-r--r--lib/mesa/src/intel/vulkan/genX_video.c447
-rw-r--r--lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c77
-rw-r--r--lib/mesa/src/intel/vulkan/grl/genX_grl.h11
-rw-r--r--lib/mesa/src/intel/vulkan/grl/genX_grl_dispatch.c17
-rw-r--r--lib/mesa/src/intel/vulkan/grl/genX_grl_uuid.cpp5
-rw-r--r--lib/mesa/src/intel/vulkan/grl/grl_cl_kernel_gen.py10
-rw-r--r--lib/mesa/src/intel/vulkan/grl/grl_metakernel_gen.py2
-rw-r--r--lib/mesa/src/intel/vulkan/grl/meson.build6
-rw-r--r--lib/mesa/src/intel/vulkan/i915/anv_batch_chain.c813
-rw-r--r--lib/mesa/src/intel/vulkan/i915/anv_batch_chain.h53
-rw-r--r--lib/mesa/src/intel/vulkan/i915/anv_device.c244
-rw-r--r--lib/mesa/src/intel/vulkan/i915/anv_device.h39
-rw-r--r--lib/mesa/src/intel/vulkan/i915/anv_kmd_backend.c184
-rw-r--r--lib/mesa/src/intel/vulkan/layers/anv_android_layer.c46
-rw-r--r--lib/mesa/src/intel/vulkan/layers/anv_doom64.c134
-rw-r--r--lib/mesa/src/intel/vulkan/meson.build93
-rw-r--r--lib/mesa/src/intel/vulkan/shaders/common_generated_draws.glsl133
-rw-r--r--lib/mesa/src/intel/vulkan/shaders/gfx11_generated_draws.glsl85
-rw-r--r--lib/mesa/src/intel/vulkan/shaders/gfx9_generated_draws.glsl144
-rw-r--r--lib/mesa/src/intel/vulkan/shaders/meson.build56
-rw-r--r--lib/mesa/src/intel/vulkan/tests/block_pool_grow_first.c2
-rw-r--r--lib/mesa/src/intel/vulkan/tests/state_pool_padding.c2
-rw-r--r--lib/mesa/src/intel/vulkan/tests/test_common.h5
-rw-r--r--lib/mesa/src/intel/vulkan/xe/anv_batch_chain.c281
-rw-r--r--lib/mesa/src/intel/vulkan/xe/anv_batch_chain.h53
-rw-r--r--lib/mesa/src/intel/vulkan/xe/anv_device.c142
-rw-r--r--lib/mesa/src/intel/vulkan/xe/anv_device.h42
-rw-r--r--lib/mesa/src/intel/vulkan/xe/anv_kmd_backend.c149
-rw-r--r--lib/mesa/src/intel/vulkan/xe/anv_queue.c123
-rw-r--r--lib/mesa/src/intel/vulkan/xe/anv_queue.h35
47 files changed, 5966 insertions, 386 deletions
diff --git a/lib/mesa/src/intel/vulkan/anv_android.c b/lib/mesa/src/intel/vulkan/anv_android.c
index 8a17f0a24..6e98763dd 100644
--- a/lib/mesa/src/intel/vulkan/anv_android.c
+++ b/lib/mesa/src/intel/vulkan/anv_android.c
@@ -34,17 +34,14 @@
#include <sync/sync.h>
#include "anv_private.h"
+#include "vk_android.h"
#include "vk_common_entrypoints.h"
#include "vk_util.h"
static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
static int anv_hal_close(struct hw_device_t *dev);
-static void UNUSED
-static_asserts(void)
-{
- STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC);
-}
+static_assert(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC, "");
PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = {
.common = {
@@ -142,8 +139,8 @@ vk_format_from_android(unsigned android_format, unsigned android_usage)
}
}
-static inline unsigned
-android_format_from_vk(unsigned vk_format)
+unsigned
+anv_ahb_format_for_vk_format(VkFormat vk_format)
{
switch (vk_format) {
case VK_FORMAT_R8G8B8A8_UNORM:
@@ -167,12 +164,6 @@ android_format_from_vk(unsigned vk_format)
}
}
-static VkFormatFeatureFlags
-features2_to_features(VkFormatFeatureFlags2 features2)
-{
- return features2 & VK_ALL_FORMAT_FEATURE_FLAG_BITS;
-}
-
static VkResult
get_ahw_buffer_format_properties2(
VkDevice device_h,
@@ -201,9 +192,9 @@ get_ahw_buffer_format_properties2(
VkAndroidHardwareBufferFormatProperties2ANDROID *p = pProperties;
p->format = vk_format_from_android(desc.format, desc.usage);
+ p->externalFormat = p->format;
const struct anv_format *anv_format = anv_get_format(p->format);
- p->externalFormat = (uint64_t) (uintptr_t) anv_format;
/* Default to OPTIMAL tiling but set to linear in case
* of AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER usage.
@@ -214,7 +205,7 @@ get_ahw_buffer_format_properties2(
tiling = VK_IMAGE_TILING_LINEAR;
p->formatFeatures =
- anv_get_image_format_features2(device->info, p->format, anv_format,
+ anv_get_image_format_features2(device->physical, p->format, anv_format,
tiling, NULL);
/* "Images can be created with an external format even if the Android hardware
@@ -274,7 +265,7 @@ anv_GetAndroidHardwareBufferPropertiesANDROID(
format_prop->format = format_prop2.format;
format_prop->externalFormat = format_prop2.externalFormat;
format_prop->formatFeatures =
- features2_to_features(format_prop2.formatFeatures);
+ vk_format_features2_to_features(format_prop2.formatFeatures);
format_prop->samplerYcbcrConversionComponents =
format_prop2.samplerYcbcrConversionComponents;
format_prop->suggestedYcbcrModel = format_prop2.suggestedYcbcrModel;
@@ -309,81 +300,21 @@ anv_GetAndroidHardwareBufferPropertiesANDROID(
return VK_SUCCESS;
}
-VkResult
-anv_GetMemoryAndroidHardwareBufferANDROID(
- VkDevice device_h,
- const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo,
- struct AHardwareBuffer **pBuffer)
-{
- ANV_FROM_HANDLE(anv_device_memory, mem, pInfo->memory);
-
- /* Some quotes from Vulkan spec:
- *
- * "If the device memory was created by importing an Android hardware
- * buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same
- * Android hardware buffer object."
- *
- * "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID must
- * have been included in VkExportMemoryAllocateInfo::handleTypes when
- * memory was created."
- */
- if (mem->ahw) {
- *pBuffer = mem->ahw;
- /* Increase refcount. */
- AHardwareBuffer_acquire(mem->ahw);
- return VK_SUCCESS;
- }
-
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-}
-
-#endif
-
-/* Construct ahw usage mask from image usage bits, see
- * 'AHardwareBuffer Usage Equivalence' in Vulkan spec.
- */
-uint64_t
-anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
- const VkImageUsageFlags vk_usage)
-{
- uint64_t ahw_usage = 0;
-#if ANDROID_API_LEVEL >= 26
- if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT)
- ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
-
- if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)
- ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
-
- if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
- ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT;
-
- if (vk_create & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)
- ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP;
-
- if (vk_create & VK_IMAGE_CREATE_PROTECTED_BIT)
- ahw_usage |= AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT;
-
- /* No usage bits set - set at least one GPU usage. */
- if (ahw_usage == 0)
- ahw_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
#endif
- return ahw_usage;
-}
/*
* Called from anv_AllocateMemory when import AHardwareBuffer.
*/
VkResult
anv_import_ahw_memory(VkDevice device_h,
- struct anv_device_memory *mem,
- const VkImportAndroidHardwareBufferInfoANDROID *info)
+ struct anv_device_memory *mem)
{
#if ANDROID_API_LEVEL >= 26
ANV_FROM_HANDLE(anv_device, device, device_h);
/* Import from AHardwareBuffer to anv_device_memory. */
const native_handle_t *handle =
- AHardwareBuffer_getNativeHandle(info->buffer);
+ AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer);
/* NOTE - We support buffers with only one handle but do not error on
* multiple handle case. Reason is that we want to support YUV formats
@@ -399,14 +330,6 @@ anv_import_ahw_memory(VkDevice device_h,
&mem->bo);
assert(result == VK_SUCCESS);
- /* "If the vkAllocateMemory command succeeds, the implementation must
- * acquire a reference to the imported hardware buffer, which it must
- * release when the device memory object is freed. If the command fails,
- * the implementation must not retain a reference."
- */
- AHardwareBuffer_acquire(info->buffer);
- mem->ahw = info->buffer;
-
return VK_SUCCESS;
#else
return VK_ERROR_EXTENSION_NOT_PRESENT;
@@ -414,70 +337,6 @@ anv_import_ahw_memory(VkDevice device_h,
}
VkResult
-anv_create_ahw_memory(VkDevice device_h,
- struct anv_device_memory *mem,
- const VkMemoryAllocateInfo *pAllocateInfo)
-{
-#if ANDROID_API_LEVEL >= 26
- const VkMemoryDedicatedAllocateInfo *dedicated_info =
- vk_find_struct_const(pAllocateInfo->pNext,
- MEMORY_DEDICATED_ALLOCATE_INFO);
-
- uint32_t w = 0;
- uint32_t h = 1;
- uint32_t layers = 1;
- uint32_t format = 0;
- uint64_t usage = 0;
-
- /* If caller passed dedicated information. */
- if (dedicated_info && dedicated_info->image) {
- ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
- w = image->vk.extent.width;
- h = image->vk.extent.height;
- layers = image->vk.array_layers;
- format = android_format_from_vk(image->vk.format);
- usage = anv_ahw_usage_from_vk_usage(image->vk.create_flags, image->vk.usage);
- } else if (dedicated_info && dedicated_info->buffer) {
- ANV_FROM_HANDLE(anv_buffer, buffer, dedicated_info->buffer);
- w = buffer->vk.size;
- format = AHARDWAREBUFFER_FORMAT_BLOB;
- usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
- AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
- } else {
- w = pAllocateInfo->allocationSize;
- format = AHARDWAREBUFFER_FORMAT_BLOB;
- usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
- AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
- }
-
- struct AHardwareBuffer *ahw = NULL;
- struct AHardwareBuffer_Desc desc = {
- .width = w,
- .height = h,
- .layers = layers,
- .format = format,
- .usage = usage,
- };
-
- if (AHardwareBuffer_allocate(&desc, &ahw) != 0)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- const VkImportAndroidHardwareBufferInfoANDROID import_info = {
- .buffer = ahw,
- };
- VkResult result = anv_import_ahw_memory(device_h, mem, &import_info);
-
- /* Release a reference to avoid leak for AHB allocation. */
- AHardwareBuffer_release(ahw);
-
- return result;
-#else
- return VK_ERROR_EXTENSION_NOT_PRESENT;
-#endif
-
-}
-
-VkResult
anv_image_init_from_gralloc(struct anv_device *device,
struct anv_image *image,
const VkImageCreateInfo *base_info,
@@ -536,6 +395,8 @@ anv_image_init_from_gralloc(struct anv_device *device,
base_info->tiling);
assert(format != ISL_FORMAT_UNSUPPORTED);
+ anv_info.stride = gralloc_info->stride * (isl_format_get_layout(format)->bpb / 8);
+
result = anv_image_init(device, image, &anv_info);
if (result != VK_SUCCESS)
goto fail_init;
@@ -548,8 +409,8 @@ anv_image_init_from_gralloc(struct anv_device *device,
&mem_reqs);
VkDeviceSize aligned_image_size =
- align_u64(mem_reqs.memoryRequirements.size,
- mem_reqs.memoryRequirements.alignment);
+ align64(mem_reqs.memoryRequirements.size,
+ mem_reqs.memoryRequirements.alignment);
if (bo->size < aligned_image_size) {
result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
diff --git a/lib/mesa/src/intel/vulkan/anv_android.h b/lib/mesa/src/intel/vulkan/anv_android.h
index 4490d3b24..e1f099e1f 100644
--- a/lib/mesa/src/intel/vulkan/anv_android.h
+++ b/lib/mesa/src/intel/vulkan/anv_android.h
@@ -44,14 +44,12 @@ VkResult anv_image_bind_from_gralloc(struct anv_device *device,
struct anv_image *image,
const VkNativeBufferANDROID *gralloc_info);
-uint64_t anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
- const VkImageUsageFlags vk_usage);
+unsigned anv_ahb_format_for_vk_format(VkFormat vk_format);
VkResult anv_import_ahw_memory(VkDevice device_h,
- struct anv_device_memory *mem,
- const VkImportAndroidHardwareBufferInfoANDROID *info);
+ struct anv_device_memory *mem);
VkResult anv_create_ahw_memory(VkDevice device_h,
struct anv_device_memory *mem,
- const VkMemoryAllocateInfo *pAllocateInfo);
+ const VkMemoryDedicatedAllocateInfo *dedicated_info);
#endif /* ANV_ANDROID_H */
diff --git a/lib/mesa/src/intel/vulkan/anv_android_stubs.c b/lib/mesa/src/intel/vulkan/anv_android_stubs.c
index d5bc11949..4e8c05f57 100644
--- a/lib/mesa/src/intel/vulkan/anv_android_stubs.c
+++ b/lib/mesa/src/intel/vulkan/anv_android_stubs.c
@@ -39,17 +39,9 @@ VkResult anv_image_bind_from_gralloc(struct anv_device *device,
return VK_ERROR_EXTENSION_NOT_PRESENT;
}
-uint64_t
-anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
- const VkImageUsageFlags vk_usage)
-{
- return 0;
-}
-
VkResult
anv_import_ahw_memory(VkDevice device_h,
- struct anv_device_memory *mem,
- const VkImportAndroidHardwareBufferInfoANDROID *info)
+ struct anv_device_memory *mem)
{
return VK_ERROR_EXTENSION_NOT_PRESENT;
}
@@ -57,7 +49,7 @@ anv_import_ahw_memory(VkDevice device_h,
VkResult
anv_create_ahw_memory(VkDevice device_h,
struct anv_device_memory *mem,
- const VkMemoryAllocateInfo *pAllocateInfo)
+ const VkMemoryDedicatedAllocateInfo *dedicated_info)
{
return VK_ERROR_EXTENSION_NOT_PRESENT;
}
diff --git a/lib/mesa/src/intel/vulkan/anv_bo_sync.c b/lib/mesa/src/intel/vulkan/anv_bo_sync.c
index 149ae2c2b..c48d52d28 100644
--- a/lib/mesa/src/intel/vulkan/anv_bo_sync.c
+++ b/lib/mesa/src/intel/vulkan/anv_bo_sync.c
@@ -24,6 +24,7 @@
#include "anv_private.h"
#include "util/os_time.h"
+#include "util/perf/cpu_trace.h"
static struct anv_bo_sync *
to_anv_bo_sync(struct vk_sync *sync)
@@ -105,6 +106,7 @@ anv_bo_sync_wait(struct vk_device *vk_device,
{
struct anv_device *device = container_of(vk_device, struct anv_device, vk);
VkResult result;
+ MESA_TRACE_FUNC();
uint32_t pending = wait_count;
while (pending) {
diff --git a/lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.c b/lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.c
new file mode 100644
index 000000000..003dbc88c
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+
+#include "compiler/brw_compiler.h"
+#include "compiler/brw_nir.h"
+#include "compiler/spirv/nir_spirv.h"
+#include "dev/intel_debug.h"
+#include "util/macros.h"
+
+#include "anv_generated_indirect_draws.h"
+
+#include "shaders/gfx9_generated_draws_spv.h"
+#include "shaders/gfx11_generated_draws_spv.h"
+
+/* This pass takes vulkan descriptor bindings 0 & 1 and turns them into global
+ * 64bit addresses. Binding 2 is left UBO that would normally be accessed
+ * through the binding table but it fully promoted to push constants.
+ *
+ * As a result we're not using the binding table at all which is nice because
+ * of the side command buffer we use for the generating shader does not
+ * interact with the binding table allocation.
+ */
+static bool
+lower_vulkan_descriptors_instr(nir_builder *b, nir_instr *instr, void *cb_data)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
+ return false;
+
+ nir_instr *res_index_instr = intrin->src[0].ssa->parent_instr;
+ assert(res_index_instr->type == nir_instr_type_intrinsic);
+ nir_intrinsic_instr *res_index_intrin =
+ nir_instr_as_intrinsic(res_index_instr);
+ assert(res_index_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
+
+ b->cursor = nir_after_instr(instr);
+
+ nir_ssa_def *desc_value = NULL;
+ switch (nir_intrinsic_binding(res_index_intrin)) {
+ case 0: {
+ desc_value =
+ nir_load_ubo(b, 1, 64,
+ nir_imm_int(b, 2),
+ nir_imm_int(b,
+ offsetof(struct anv_generated_indirect_params,
+ indirect_data_addr)),
+ .align_mul = 8,
+ .align_offset = 0,
+ .range_base = 0,
+ .range = ~0);
+ desc_value =
+ nir_vec4(b,
+ nir_unpack_64_2x32_split_x(b, desc_value),
+ nir_unpack_64_2x32_split_y(b, desc_value),
+ nir_imm_int(b, 0),
+ nir_imm_int(b, 0));
+ break;
+ }
+
+ case 1: {
+ desc_value =
+ nir_load_ubo(b, 1, 64,
+ nir_imm_int(b, 2),
+ nir_imm_int(b,
+ offsetof(struct anv_generated_indirect_params,
+ generated_cmds_addr)),
+ .align_mul = 8,
+ .align_offset = 0,
+ .range_base = 0,
+ .range = ~0);
+ desc_value =
+ nir_vec4(b,
+ nir_unpack_64_2x32_split_x(b, desc_value),
+ nir_unpack_64_2x32_split_y(b, desc_value),
+ nir_imm_int(b, 0),
+ nir_imm_int(b, 0));
+ break;
+ }
+
+ case 2: {
+ desc_value =
+ nir_load_ubo(b, 1, 64,
+ nir_imm_int(b, 2),
+ nir_imm_int(b,
+ offsetof(struct anv_generated_indirect_params,
+ draw_ids_addr)),
+ .align_mul = 8,
+ .align_offset = 0,
+ .range_base = 0,
+ .range = ~0);
+ desc_value =
+ nir_vec4(b,
+ nir_unpack_64_2x32_split_x(b, desc_value),
+ nir_unpack_64_2x32_split_y(b, desc_value),
+ nir_imm_int(b, 0),
+ nir_imm_int(b, 0));
+ break;
+ }
+
+ case 3:
+ desc_value =
+ nir_vec2(b,
+ nir_imm_int(b, 2),
+ nir_imm_int(b, 0));
+ break;
+ }
+
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc_value);
+
+ return true;
+}
+
+static bool
+lower_vulkan_descriptors(nir_shader *shader)
+{
+ return nir_shader_instructions_pass(shader,
+ lower_vulkan_descriptors_instr,
+ nir_metadata_block_index |
+ nir_metadata_dominance,
+ NULL);
+}
+
+static struct anv_shader_bin *
+compile_upload_spirv(struct anv_device *device,
+ const void *key,
+ uint32_t key_size,
+ const uint32_t *spirv_source,
+ uint32_t spirv_source_size,
+ uint32_t sends_count_expectation)
+{
+ struct spirv_to_nir_options spirv_options = {
+ .caps = {
+ .int64 = true,
+ },
+ .ubo_addr_format = nir_address_format_32bit_index_offset,
+ .ssbo_addr_format = nir_address_format_64bit_global_32bit_offset,
+ .environment = NIR_SPIRV_VULKAN,
+ .create_library = false,
+ };
+ const nir_shader_compiler_options *nir_options =
+ device->physical->compiler->nir_options[MESA_SHADER_FRAGMENT];
+
+ nir_shader* nir =
+ spirv_to_nir(spirv_source, spirv_source_size,
+ NULL, 0, MESA_SHADER_FRAGMENT, "main",
+ &spirv_options, nir_options);
+
+ assert(nir != NULL);
+
+ nir->info.internal = true;
+
+ nir_validate_shader(nir, "after spirv_to_nir");
+ nir_validate_ssa_dominance(nir, "after spirv_to_nir");
+
+ NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
+ NIR_PASS_V(nir, nir_lower_returns);
+ NIR_PASS_V(nir, nir_inline_functions);
+ NIR_PASS_V(nir, nir_opt_deref);
+
+ /* Pick off the single entrypoint that we want */
+ nir_remove_non_entrypoints(nir);
+
+ NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+ NIR_PASS_V(nir, nir_copy_prop);
+ NIR_PASS_V(nir, nir_opt_dce);
+ NIR_PASS_V(nir, nir_opt_cse);
+ NIR_PASS_V(nir, nir_opt_gcm, true);
+ NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
+ NIR_PASS_V(nir, nir_opt_dce);
+
+ NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
+
+ NIR_PASS_V(nir, nir_split_var_copies);
+ NIR_PASS_V(nir, nir_split_per_member_structs);
+
+ struct brw_compiler *compiler = device->physical->compiler;
+ struct brw_nir_compiler_opts opts = {};
+ brw_preprocess_nir(compiler, nir, &opts);
+
+ NIR_PASS_V(nir, nir_propagate_invariant, false);
+
+ NIR_PASS_V(nir, nir_lower_input_attachments,
+ &(nir_input_attachment_options) {
+ .use_fragcoord_sysval = true,
+ .use_layer_id_sysval = true,
+ });
+
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+ /* Do vectorizing here. For some reason when trying to do it in the back
+ * this just isn't working.
+ */
+ nir_load_store_vectorize_options options = {
+ .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
+ .callback = brw_nir_should_vectorize_mem,
+ .robust_modes = (nir_variable_mode)0,
+ };
+ NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
+
+ NIR_PASS_V(nir, lower_vulkan_descriptors);
+ NIR_PASS_V(nir, nir_opt_dce);
+
+ NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
+ nir_address_format_32bit_index_offset);
+ NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
+ nir_address_format_64bit_global_32bit_offset);
+
+ NIR_PASS_V(nir, nir_copy_prop);
+ NIR_PASS_V(nir, nir_opt_constant_folding);
+ NIR_PASS_V(nir, nir_opt_dce);
+
+ struct brw_wm_prog_key wm_key;
+ memset(&wm_key, 0, sizeof(wm_key));
+
+ struct brw_wm_prog_data wm_prog_data = {
+ .base.nr_params = nir->num_uniforms / 4,
+ };
+
+ brw_nir_analyze_ubo_ranges(compiler, nir, NULL, wm_prog_data.base.ubo_ranges);
+
+ struct brw_compile_stats stats[3];
+ struct brw_compile_fs_params params = {
+ .nir = nir,
+ .key = &wm_key,
+ .prog_data = &wm_prog_data,
+ .stats = stats,
+ .log_data = device,
+ .debug_flag = DEBUG_WM,
+ };
+ const unsigned *program = brw_compile_fs(compiler, nir, &params);
+
+ unsigned stat_idx = 0;
+ if (wm_prog_data.dispatch_8) {
+ assert(stats[stat_idx].spills == 0);
+ assert(stats[stat_idx].fills == 0);
+ assert(stats[stat_idx].sends == sends_count_expectation);
+ stat_idx++;
+ }
+ if (wm_prog_data.dispatch_16) {
+ assert(stats[stat_idx].spills == 0);
+ assert(stats[stat_idx].fills == 0);
+ assert(stats[stat_idx].sends == sends_count_expectation);
+ stat_idx++;
+ }
+ if (wm_prog_data.dispatch_32) {
+ assert(stats[stat_idx].spills == 0);
+ assert(stats[stat_idx].fills == 0);
+ assert(stats[stat_idx].sends == sends_count_expectation * 2);
+ stat_idx++;
+ }
+
+ struct anv_pipeline_bind_map bind_map;
+ memset(&bind_map, 0, sizeof(bind_map));
+
+ struct anv_push_descriptor_info push_desc_info = {};
+
+ struct anv_shader_bin *kernel =
+ anv_device_upload_kernel(device,
+ device->internal_cache,
+ nir->info.stage,
+ key, key_size, program,
+ wm_prog_data.base.program_size,
+ &wm_prog_data.base, sizeof(wm_prog_data),
+ NULL, 0, NULL, &bind_map,
+ &push_desc_info);
+
+ ralloc_free(nir);
+
+ return kernel;
+}
+
+VkResult
+anv_device_init_generated_indirect_draws(struct anv_device *device)
+{
+ const struct intel_l3_weights w =
+ intel_get_default_l3_weights(device->info,
+ true /* wants_dc_cache */,
+ false /* needs_slm */);
+ device->generated_draw_l3_config = intel_get_l3_config(device->info, w);
+
+ struct {
+ char name[40];
+ } indirect_draws_key = {
+ .name = "anv-generated-indirect-draws",
+ };
+
+ device->generated_draw_kernel =
+ anv_device_search_for_kernel(device,
+ device->internal_cache,
+ &indirect_draws_key,
+ sizeof(indirect_draws_key),
+ NULL);
+ if (device->generated_draw_kernel == NULL) {
+ const uint32_t *spirv_source =
+ device->info->ver >= 11 ?
+ gfx11_generated_draws_spv_source :
+ gfx9_generated_draws_spv_source;
+ const uint32_t spirv_source_size =
+ device->info->ver >= 11 ?
+ ARRAY_SIZE(gfx11_generated_draws_spv_source) :
+ ARRAY_SIZE(gfx9_generated_draws_spv_source);
+ const uint32_t send_count =
+ device->info->ver >= 11 ?
+ 11 /* 2 * (2 loads + 3 stores) + 1 store */ :
+ 17 /* 2 * (2 loads + 6 stores) + 1 store */;
+
+ device->generated_draw_kernel =
+ compile_upload_spirv(device,
+ &indirect_draws_key,
+ sizeof(indirect_draws_key),
+ spirv_source, spirv_source_size, send_count);
+ }
+ if (device->generated_draw_kernel == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* The cache already has a reference and it's not going anywhere so there
+ * is no need to hold a second reference.
+ */
+ anv_shader_bin_unref(device, device->generated_draw_kernel);
+
+ return VK_SUCCESS;
+}
+
+void
+anv_device_finish_generated_indirect_draws(struct anv_device *device)
+{
+}
diff --git a/lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.h b/lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.h
new file mode 100644
index 000000000..e8ab8553a
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/anv_generated_indirect_draws.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef ANV_GENERATED_INDIRECT_DRAWS_H
+#define ANV_GENERATED_INDIRECT_DRAWS_H
+
+#include <stdint.h>
+
+#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0)
+#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
+#define ANV_GENERATED_FLAG_DRAWID BITFIELD_BIT(2)
+#define ANV_GENERATED_FLAG_BASE BITFIELD_BIT(3)
+
+/* This needs to match common_generated_draws.glsl :
+ *
+ * layout(set = 0, binding = 2) uniform block
+ */
+struct anv_generated_indirect_draw_params {
+ /* Draw ID buffer address (only used on Gfx9) */
+ uint64_t draw_id_addr;
+ /* Indirect data buffer address (only used on Gfx9) */
+ uint64_t indirect_data_addr;
+ /* Stride between each elements of the indirect data buffer */
+ uint32_t indirect_data_stride;
+ uint32_t flags; /* 0-7: bits, 8-15: mocs, 16-23: cmd_dws */
+ /* Base number of the draw ID, it is added to the index computed from the
+ * gl_FragCoord
+ */
+ uint32_t draw_base;
+
+ /* Number of draws to generate */
+ uint32_t draw_count;
+
+ /* Maximum number of draws (equals to draw_count for indirect draws without
+ * an indirect count)
+ */
+ uint32_t max_draw_count;
+
+ /* Instance multiplier for multi view */
+ uint32_t instance_multiplier;
+
+ /* Address where to jump at after the generated draw (only used with
+ * indirect draw count variants)
+ */
+ uint64_t end_addr;
+};
+
+struct anv_generated_indirect_params {
+ struct anv_generated_indirect_draw_params draw;
+
+ /* Global address of binding 0 */
+ uint64_t indirect_data_addr;
+
+ /* Global address of binding 1 */
+ uint64_t generated_cmds_addr;
+
+ /* Global address of binding 2 */
+ uint64_t draw_ids_addr;
+
+ /* CPU side pointer to the previous item when number of draws has to be
+ * split into smaller chunks, see while loop in
+ * genX(cmd_buffer_emit_indirect_generated_draws)
+ */
+ struct anv_generated_indirect_params *prev;
+};
+
+#endif /* ANV_GENERATED_INDIRECT_DRAWS_H */
diff --git a/lib/mesa/src/intel/vulkan/anv_kmd_backend.c b/lib/mesa/src/intel/vulkan/anv_kmd_backend.c
new file mode 100644
index 000000000..8ce882bba
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/anv_kmd_backend.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+
+#include "anv_kmd_backend.h"
+#include "anv_private.h"
+
+const struct anv_kmd_backend *
+anv_kmd_backend_get(enum intel_kmd_type type)
+{
+ switch (type) {
+ case INTEL_KMD_TYPE_I915:
+ return anv_i915_kmd_backend_get();
+ case INTEL_KMD_TYPE_XE:
+ return anv_xe_kmd_backend_get();
+ case INTEL_KMD_TYPE_STUB:
+ return anv_stub_kmd_backend_get();
+ default:
+ return NULL;
+ }
+}
diff --git a/lib/mesa/src/intel/vulkan/anv_kmd_backend.h b/lib/mesa/src/intel/vulkan/anv_kmd_backend.h
new file mode 100644
index 000000000..76c5f2f27
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/anv_kmd_backend.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include "vulkan/vulkan_core.h"
+#include "vk_sync.h"
+
+#include "dev/intel_device_info.h"
+#include "dev/intel_kmd.h"
+
+struct anv_bo;
+enum anv_bo_alloc_flags;
+struct anv_cmd_buffer;
+struct anv_device;
+struct anv_queue;
+struct anv_query_pool;
+struct anv_utrace_submit;
+
+struct anv_kmd_backend {
+ /*
+ * Create a gem buffer.
+ * Return the gem handle in case of success otherwise returns 0.
+ */
+ uint32_t (*gem_create)(struct anv_device *device,
+ const struct intel_memory_class_instance **regions,
+ uint16_t num_regions, uint64_t size,
+ enum anv_bo_alloc_flags alloc_flags,
+ uint64_t *actual_size);
+ void (*gem_close)(struct anv_device *device, uint32_t handle);
+ /* Returns MAP_FAILED on error */
+ void *(*gem_mmap)(struct anv_device *device, struct anv_bo *bo,
+ uint64_t offset, uint64_t size,
+ VkMemoryPropertyFlags property_flags);
+ int (*gem_vm_bind)(struct anv_device *device, struct anv_bo *bo);
+ int (*gem_vm_unbind)(struct anv_device *device, struct anv_bo *bo);
+ VkResult (*execute_simple_batch)(struct anv_queue *queue,
+ struct anv_bo *batch_bo,
+ uint32_t batch_bo_size);
+ VkResult (*queue_exec_locked)(struct anv_queue *queue,
+ uint32_t wait_count,
+ const struct vk_sync_wait *waits,
+ uint32_t cmd_buffer_count,
+ struct anv_cmd_buffer **cmd_buffers,
+ uint32_t signal_count,
+ const struct vk_sync_signal *signals,
+ struct anv_query_pool *perf_query_pool,
+ uint32_t perf_query_pass);
+ VkResult (*queue_exec_trace)(struct anv_queue *queue,
+ struct anv_utrace_submit *submit);
+};
+
+const struct anv_kmd_backend *anv_kmd_backend_get(enum intel_kmd_type type);
+
+/* Internal functions, should only be called by anv_kmd_backend_get() */
+const struct anv_kmd_backend *anv_i915_kmd_backend_get(void);
+const struct anv_kmd_backend *anv_xe_kmd_backend_get(void);
+const struct anv_kmd_backend *anv_stub_kmd_backend_get(void);
diff --git a/lib/mesa/src/intel/vulkan/anv_mesh_perprim_wa.c b/lib/mesa/src/intel/vulkan/anv_mesh_perprim_wa.c
new file mode 100644
index 000000000..f7346b6dc
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/anv_mesh_perprim_wa.c
@@ -0,0 +1,557 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+#include "nir_builder.h"
+
+/*
+ * Wa_14015590813 for gfx 12.5.
+ *
+ * This file implements workaround for HW bug, which leads to fragment shader
+ * reading incorrect per-primitive data if mesh shader, in addition to writing
+ * per-primitive data, also writes to gl_ClipDistance.
+ *
+ * The suggested solution to that bug is to not use per-primitive data by:
+ * - creating new vertices for provoking vertices shared by multiple primitives
+ * - converting per-primitive attributes read by fragment shader to flat
+ * per-vertex attributes for the provoking vertex
+ * - modifying fragment shader to read those per-vertex attributes
+ *
+ * There are at least 2 type of failures not handled very well:
+ * - if the number of varying slots overflows, than only some attributes will
+ * be converted, leading to corruption of those unconverted attributes
+ * - if the overall MUE size is so large it doesn't fit in URB, then URB
+ * allocation will fail in some way; unfortunately there's no good way to
+ * say how big MUE will be at this moment and back out
+ *
+ * This workaround needs to be applied before linking, so that unused outputs
+ * created by this code are removed at link time.
+ *
+ * This workaround can be controlled by a driconf option to either disable it,
+ * lower its scope or force enable it.
+ *
+ * Option "anv_mesh_conv_prim_attrs_to_vert_attrs" is evaluated like this:
+ * value == 0 - disable workaround
+ * value < 0 - enable ONLY if workaround is required
+ * value > 0 - enable ALWAYS, even if it's not required
+ * abs(value) >= 1 - attribute conversion
+ * abs(value) >= 2 - attribute conversion and vertex duplication
+ *
+ * Default: -2 (both parts of the work around, ONLY if it's required)
+ *
+ */
+
+static bool
+anv_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir,
+ gl_varying_slot *wa_mapping,
+ uint64_t fs_inputs,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ void *mem_ctx,
+ const bool dup_vertices,
+ const bool force_conversion)
+{
+ uint64_t per_primitive_outputs = nir->info.per_primitive_outputs;
+ per_primitive_outputs &= ~BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES);
+
+ if (per_primitive_outputs == 0)
+ return false;
+
+ uint64_t outputs_written = nir->info.outputs_written;
+ uint64_t other_outputs = outputs_written & ~per_primitive_outputs;
+
+ if ((other_outputs & (VARYING_BIT_CLIP_DIST0 | VARYING_BIT_CLIP_DIST1)) == 0)
+ if (!force_conversion)
+ return false;
+
+ uint64_t all_outputs = outputs_written;
+ unsigned attrs = 0;
+
+ uint64_t remapped_outputs = outputs_written & per_primitive_outputs;
+ remapped_outputs &= ~BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE);
+
+ /* Skip locations not read by the fragment shader, because they will
+ * be eliminated at linking time. Note that some fs inputs may be
+ * removed only after optimizations, so it's possible that we will
+ * create too many variables.
+ */
+ remapped_outputs &= fs_inputs;
+
+ /* Figure out the mapping between per-primitive and new per-vertex outputs. */
+ nir_foreach_shader_out_variable(var, nir) {
+ int location = var->data.location;
+
+ if (!(BITFIELD64_BIT(location) & remapped_outputs))
+ continue;
+
+ /* Although primitive shading rate, layer and viewport have predefined
+ * place in MUE Primitive Header (so we can't really move them anywhere),
+ * we have to copy them to per-vertex space if fragment shader reads them.
+ */
+ assert(location == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
+ location == VARYING_SLOT_LAYER ||
+ location == VARYING_SLOT_VIEWPORT ||
+ location == VARYING_SLOT_PRIMITIVE_ID ||
+ location >= VARYING_SLOT_VAR0);
+
+ const struct glsl_type *type = var->type;
+ if (nir_is_arrayed_io(var, MESA_SHADER_MESH) || var->data.per_view) {
+ assert(glsl_type_is_array(type));
+ type = glsl_get_array_element(type);
+ }
+
+ unsigned num_slots = glsl_count_attribute_slots(type, false);
+
+ for (gl_varying_slot slot = VARYING_SLOT_VAR0; slot <= VARYING_SLOT_VAR31; slot++) {
+ uint64_t mask = BITFIELD64_MASK(num_slots) << slot;
+ if ((all_outputs & mask) == 0) {
+ wa_mapping[location] = slot;
+ all_outputs |= mask;
+ attrs++;
+ break;
+ }
+ }
+
+ if (wa_mapping[location] == 0) {
+ fprintf(stderr, "Not enough space for hardware per-primitive data corruption work around.\n");
+ break;
+ }
+ }
+
+ if (attrs == 0)
+ if (!force_conversion)
+ return false;
+
+ unsigned provoking_vertex = 0;
+
+ const VkPipelineRasterizationStateCreateInfo *rs_info = pCreateInfo->pRasterizationState;
+ const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *rs_pv_info =
+ vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT);
+ if (rs_pv_info && rs_pv_info->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT)
+ provoking_vertex = 2;
+
+ unsigned vertices_per_primitive =
+ num_mesh_vertices_per_primitive(nir->info.mesh.primitive_type);
+
+ nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ b.cursor = nir_after_cf_list(&impl->body);
+
+ /* wait for all subgroups to finish */
+ nir_scoped_barrier(&b, NIR_SCOPE_WORKGROUP);
+
+ nir_ssa_def *zero = nir_imm_int(&b, 0);
+
+ nir_ssa_def *local_invocation_index = nir_build_load_local_invocation_index(&b);
+
+ nir_ssa_def *cmp = nir_ieq(&b, local_invocation_index, zero);
+ nir_if *if_stmt = nir_push_if(&b, cmp);
+ {
+ nir_variable *primitive_count_var = NULL;
+ nir_variable *primitive_indices_var = NULL;
+
+ unsigned num_other_variables = 0;
+ nir_foreach_shader_out_variable(var, b.shader) {
+ if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0)
+ continue;
+ num_other_variables++;
+ }
+
+ nir_deref_instr **per_vertex_derefs =
+ ralloc_array(mem_ctx, nir_deref_instr *, num_other_variables);
+
+ unsigned num_per_vertex_variables = 0;
+
+ unsigned processed = 0;
+ nir_foreach_shader_out_variable(var, b.shader) {
+ if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0)
+ continue;
+
+ switch (var->data.location) {
+ case VARYING_SLOT_PRIMITIVE_COUNT:
+ primitive_count_var = var;
+ break;
+ case VARYING_SLOT_PRIMITIVE_INDICES:
+ primitive_indices_var = var;
+ break;
+ default: {
+ const struct glsl_type *type = var->type;
+ assert(glsl_type_is_array(type));
+ const struct glsl_type *array_element_type =
+ glsl_get_array_element(type);
+
+ if (dup_vertices) {
+ /*
+ * Resize type of array output to make space for one extra
+ * vertex attribute for each primitive, so we ensure that
+ * the provoking vertex is not shared between primitives.
+ */
+ const struct glsl_type *new_type =
+ glsl_array_type(array_element_type,
+ glsl_get_length(type) +
+ nir->info.mesh.max_primitives_out,
+ 0);
+
+ var->type = new_type;
+ }
+
+ per_vertex_derefs[num_per_vertex_variables++] =
+ nir_build_deref_var(&b, var);
+ break;
+ }
+ }
+
+ ++processed;
+ }
+ assert(processed == num_other_variables);
+
+ assert(primitive_count_var != NULL);
+ assert(primitive_indices_var != NULL);
+
+ /* Update types of derefs to match type of variables they (de)reference. */
+ if (dup_vertices) {
+ nir_foreach_function(function, b.shader) {
+ if (!function->impl)
+ continue;
+
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_deref)
+ continue;
+
+ nir_deref_instr *deref = nir_instr_as_deref(instr);
+ if (deref->deref_type != nir_deref_type_var)
+ continue;
+
+ if (deref->var->type != deref->type)
+ deref->type = deref->var->type;
+ }
+ }
+ }
+ }
+
+ /* indexed by slot of per-prim attribute */
+ struct {
+ nir_deref_instr *per_prim_deref;
+ nir_deref_instr *per_vert_deref;
+ } mapping[VARYING_SLOT_MAX] = {{NULL, NULL}, };
+
+ /* Create new per-vertex output variables mirroring per-primitive variables
+ * and create derefs for both old and new variables.
+ */
+ nir_foreach_shader_out_variable(var, b.shader) {
+ gl_varying_slot location = var->data.location;
+
+ if ((BITFIELD64_BIT(location) & (outputs_written & per_primitive_outputs)) == 0)
+ continue;
+ if (wa_mapping[location] == 0)
+ continue;
+
+ const struct glsl_type *type = var->type;
+ assert(glsl_type_is_array(type));
+ const struct glsl_type *array_element_type = glsl_get_array_element(type);
+
+ const struct glsl_type *new_type =
+ glsl_array_type(array_element_type,
+ nir->info.mesh.max_vertices_out +
+ (dup_vertices ? nir->info.mesh.max_primitives_out : 0),
+ 0);
+
+ nir_variable *new_var =
+ nir_variable_create(b.shader, nir_var_shader_out, new_type, var->name);
+ assert(wa_mapping[location] >= VARYING_SLOT_VAR0);
+ assert(wa_mapping[location] <= VARYING_SLOT_VAR31);
+ new_var->data.location = wa_mapping[location];
+ new_var->data.interpolation = INTERP_MODE_FLAT;
+
+ mapping[location].per_vert_deref = nir_build_deref_var(&b, new_var);
+ mapping[location].per_prim_deref = nir_build_deref_var(&b, var);
+ }
+
+ nir_ssa_def *trueconst = nir_imm_true(&b);
+
+ /*
+ * for each Primitive (0 : primitiveCount)
+ * if VertexUsed[PrimitiveIndices[Primitive][provoking vertex]]
+ * create 1 new vertex at offset "Vertex"
+ * copy per vert attributes of provoking vertex to the new one
+ * update PrimitiveIndices[Primitive][provoking vertex]
+ * Vertex++
+ * else
+ * VertexUsed[PrimitiveIndices[Primitive][provoking vertex]] := true
+ *
+ * for each attribute : mapping
+ * copy per_prim_attr(Primitive) to per_vert_attr[Primitive][provoking vertex]
+ */
+
+ /* primitive count */
+ nir_ssa_def *primitive_count = nir_load_var(&b, primitive_count_var);
+
+ /* primitive index */
+ nir_variable *primitive_var =
+ nir_local_variable_create(impl, glsl_uint_type(), "Primitive");
+ nir_deref_instr *primitive_deref = nir_build_deref_var(&b, primitive_var);
+ nir_store_deref(&b, primitive_deref, zero, 1);
+
+ /* vertex index */
+ nir_variable *vertex_var =
+ nir_local_variable_create(impl, glsl_uint_type(), "Vertex");
+ nir_deref_instr *vertex_deref = nir_build_deref_var(&b, vertex_var);
+ nir_store_deref(&b, vertex_deref, nir_imm_int(&b, nir->info.mesh.max_vertices_out), 1);
+
+ /* used vertices bitvector */
+ const struct glsl_type *used_vertex_type =
+ glsl_array_type(glsl_bool_type(),
+ nir->info.mesh.max_vertices_out,
+ 0);
+ nir_variable *used_vertex_var =
+ nir_local_variable_create(impl, used_vertex_type, "VertexUsed");
+ nir_deref_instr *used_vertex_deref =
+ nir_build_deref_var(&b, used_vertex_var);
+ /* Initialize it as "not used" */
+ for (unsigned i = 0; i < nir->info.mesh.max_vertices_out; ++i) {
+ nir_deref_instr *indexed_used_vertex_deref =
+ nir_build_deref_array(&b, used_vertex_deref, nir_imm_int(&b, i));
+ nir_store_deref(&b, indexed_used_vertex_deref, nir_imm_false(&b), 1);
+ }
+
+ nir_loop *loop = nir_push_loop(&b);
+ {
+ nir_ssa_def *primitive = nir_load_deref(&b, primitive_deref);
+ nir_ssa_def *cmp = nir_ige(&b, primitive, primitive_count);
+
+ nir_if *loop_check = nir_push_if(&b, cmp);
+ nir_jump(&b, nir_jump_break);
+ nir_pop_if(&b, loop_check);
+
+ nir_deref_instr *primitive_indices_deref =
+ nir_build_deref_var(&b, primitive_indices_var);
+ nir_deref_instr *indexed_primitive_indices_deref;
+ nir_ssa_def *src_vertex;
+ nir_ssa_def *prim_indices;
+
+ if (nir->info.mesh.nv) {
+ /* flat array, but we can deref each index directly */
+ nir_ssa_def *index_index =
+ nir_imul(&b, primitive, nir_imm_int(&b, vertices_per_primitive));
+ index_index = nir_iadd(&b, index_index, nir_imm_int(&b, provoking_vertex));
+ indexed_primitive_indices_deref = nir_build_deref_array(&b, primitive_indices_deref, index_index);
+ src_vertex = nir_load_deref(&b, indexed_primitive_indices_deref);
+ prim_indices = NULL;
+ } else {
+ /* array of vectors, we have to extract index out of array deref */
+ indexed_primitive_indices_deref = nir_build_deref_array(&b, primitive_indices_deref, primitive);
+ prim_indices = nir_load_deref(&b, indexed_primitive_indices_deref);
+ src_vertex = nir_channel(&b, prim_indices, provoking_vertex);
+ }
+
+ nir_ssa_def *dst_vertex = nir_load_deref(&b, vertex_deref);
+
+ nir_deref_instr *indexed_used_vertex_deref =
+ nir_build_deref_array(&b, used_vertex_deref, src_vertex);
+ nir_ssa_def *used_vertex = nir_load_deref(&b, indexed_used_vertex_deref);
+ if (!dup_vertices)
+ used_vertex = nir_imm_false(&b);
+
+ nir_if *vertex_used_check = nir_push_if(&b, used_vertex);
+ {
+ for (unsigned a = 0; a < num_per_vertex_variables; ++a) {
+ nir_deref_instr *attr_arr = per_vertex_derefs[a];
+ nir_deref_instr *src = nir_build_deref_array(&b, attr_arr, src_vertex);
+ nir_deref_instr *dst = nir_build_deref_array(&b, attr_arr, dst_vertex);
+
+ nir_copy_deref(&b, dst, src);
+ }
+
+ if (nir->info.mesh.nv) {
+ nir_store_deref(&b, indexed_primitive_indices_deref, dst_vertex, 1);
+ } else {
+ /* replace one component of primitive indices vector */
+ nir_ssa_def *new_val =
+ nir_vector_insert_imm(&b, prim_indices, dst_vertex, provoking_vertex);
+
+ /* and store complete vector */
+ nir_store_deref(&b, indexed_primitive_indices_deref, new_val,
+ BITFIELD_MASK(vertices_per_primitive));
+ }
+
+ nir_store_deref(&b, vertex_deref, nir_iadd_imm(&b, dst_vertex, 1), 1);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) {
+ if (!mapping[i].per_vert_deref)
+ continue;
+
+ nir_deref_instr *src =
+ nir_build_deref_array(&b, mapping[i].per_prim_deref, primitive);
+ nir_deref_instr *dst =
+ nir_build_deref_array(&b, mapping[i].per_vert_deref, dst_vertex);
+
+ nir_copy_deref(&b, dst, src);
+ }
+ }
+ nir_push_else(&b, vertex_used_check);
+ {
+ nir_store_deref(&b, indexed_used_vertex_deref, trueconst, 1);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) {
+ if (!mapping[i].per_vert_deref)
+ continue;
+
+ nir_deref_instr *src =
+ nir_build_deref_array(&b, mapping[i].per_prim_deref, primitive);
+ nir_deref_instr *dst =
+ nir_build_deref_array(&b, mapping[i].per_vert_deref, src_vertex);
+
+ nir_copy_deref(&b, dst, src);
+ }
+
+ }
+ nir_pop_if(&b, vertex_used_check);
+
+ nir_store_deref(&b, primitive_deref, nir_iadd_imm(&b, primitive, 1), 1);
+ }
+ nir_pop_loop(&b, loop);
+ }
+ nir_pop_if(&b, if_stmt); /* local_invocation_index == 0 */
+
+ if (dup_vertices)
+ nir->info.mesh.max_vertices_out += nir->info.mesh.max_primitives_out;
+
+ if (should_print_nir(nir)) {
+ printf("%s\n", __func__);
+ nir_print_shader(nir, stdout);
+ }
+
+ /* deal with copy_derefs */
+ NIR_PASS(_, nir, nir_split_var_copies);
+ NIR_PASS(_, nir, nir_lower_var_copies);
+
+ nir_shader_gather_info(nir, impl);
+
+ return true;
+}
+
+static bool
+anv_frag_update_derefs_instr(struct nir_builder *b, nir_instr *instr, void *data)
+{
+ if (instr->type != nir_instr_type_deref)
+ return false;
+
+ nir_deref_instr *deref = nir_instr_as_deref(instr);
+ if (deref->deref_type != nir_deref_type_var)
+ return false;
+
+ nir_variable *var = deref->var;
+ if (!(var->data.mode & nir_var_shader_in))
+ return false;
+
+ int location = var->data.location;
+ nir_deref_instr **new_derefs = (nir_deref_instr **)data;
+ if (new_derefs[location] == NULL)
+ return false;
+
+ assert(deref->dest.is_ssa);
+ assert(new_derefs[location]->dest.is_ssa);
+
+ nir_instr_remove(&deref->instr);
+ nir_ssa_def_rewrite_uses(&deref->dest.ssa, &new_derefs[location]->dest.ssa);
+
+ return true;
+}
+
+static bool
+anv_frag_update_derefs(nir_shader *shader, nir_deref_instr **mapping)
+{
+ return nir_shader_instructions_pass(shader, anv_frag_update_derefs_instr,
+ nir_metadata_none, (void *)mapping);
+}
+
+/* Update fragment shader inputs with new ones. */
+static void
+anv_frag_convert_attrs_prim_to_vert(struct nir_shader *nir,
+ gl_varying_slot *wa_mapping)
+{
+ /* indexed by slot of per-prim attribute */
+ nir_deref_instr *new_derefs[VARYING_SLOT_MAX] = {NULL, };
+
+ nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ b.cursor = nir_before_cf_list(&impl->body);
+
+ nir_foreach_shader_in_variable_safe(var, nir) {
+ gl_varying_slot location = var->data.location;
+ gl_varying_slot new_location = wa_mapping[location];
+ if (new_location == 0)
+ continue;
+
+ assert(wa_mapping[new_location] == 0);
+
+ nir_variable *new_var =
+ nir_variable_create(b.shader, nir_var_shader_in, var->type, var->name);
+ new_var->data.location = new_location;
+ new_var->data.location_frac = var->data.location_frac;
+ new_var->data.interpolation = INTERP_MODE_FLAT;
+
+ new_derefs[location] = nir_build_deref_var(&b, new_var);
+ }
+
+ NIR_PASS(_, nir, anv_frag_update_derefs, new_derefs);
+
+ nir_shader_gather_info(nir, impl);
+}
+
+void
+anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
+ struct nir_shader *fs_nir,
+ struct anv_device *device,
+ const VkGraphicsPipelineCreateInfo *info)
+{
+ const struct intel_device_info *devinfo = device->info;
+
+ int mesh_conv_prim_attrs_to_vert_attrs =
+ device->physical->instance->mesh_conv_prim_attrs_to_vert_attrs;
+ if (mesh_conv_prim_attrs_to_vert_attrs < 0 &&
+ !intel_needs_workaround(devinfo, 14015590813))
+ mesh_conv_prim_attrs_to_vert_attrs = 0;
+
+ if (mesh_conv_prim_attrs_to_vert_attrs != 0) {
+ uint64_t fs_inputs = 0;
+ nir_foreach_shader_in_variable(var, fs_nir)
+ fs_inputs |= BITFIELD64_BIT(var->data.location);
+
+ void *stage_ctx = ralloc_context(NULL);
+
+ gl_varying_slot wa_mapping[VARYING_SLOT_MAX] = { 0, };
+
+ const bool dup_vertices = abs(mesh_conv_prim_attrs_to_vert_attrs) >= 2;
+ const bool force_conversion = mesh_conv_prim_attrs_to_vert_attrs > 0;
+
+ if (anv_mesh_convert_attrs_prim_to_vert(ms_nir, wa_mapping,
+ fs_inputs, info, stage_ctx,
+ dup_vertices, force_conversion))
+ anv_frag_convert_attrs_prim_to_vert(fs_nir, wa_mapping);
+
+ ralloc_free(stage_ctx);
+ }
+}
diff --git a/lib/mesa/src/intel/vulkan/anv_nir_compute_push_layout.c b/lib/mesa/src/intel/vulkan/anv_nir_compute_push_layout.c
index 22478e7e3..1d4b8009e 100644
--- a/lib/mesa/src/intel/vulkan/anv_nir_compute_push_layout.c
+++ b/lib/mesa/src/intel/vulkan/anv_nir_compute_push_layout.c
@@ -67,12 +67,13 @@ anv_nir_compute_push_layout(nir_shader *nir,
break;
}
- case nir_intrinsic_load_desc_set_address_intel:
- push_start = MIN2(push_start,
- offsetof(struct anv_push_constants, desc_sets));
- push_end = MAX2(push_end, push_start +
+ case nir_intrinsic_load_desc_set_address_intel: {
+ unsigned base = offsetof(struct anv_push_constants, desc_sets);
+ push_start = MIN2(push_start, base);
+ push_end = MAX2(push_end, base +
sizeof_field(struct anv_push_constants, desc_sets));
break;
+ }
default:
break;
@@ -117,7 +118,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
* push_end (no push constants is indicated by push_start = UINT_MAX).
*/
push_start = MIN2(push_start, push_end);
- push_start = align_down_u32(push_start, 32);
+ push_start = ROUND_DOWN_TO(push_start, 32);
/* For vec4 our push data size needs to be aligned to a vec4 and for
* scalar, it needs to be aligned to a DWORD.
diff --git a/lib/mesa/src/intel/vulkan/anv_nir_lower_ubo_loads.c b/lib/mesa/src/intel/vulkan/anv_nir_lower_ubo_loads.c
index 5a170352c..f1609a22c 100644
--- a/lib/mesa/src/intel/vulkan/anv_nir_lower_ubo_loads.c
+++ b/lib/mesa/src/intel/vulkan/anv_nir_lower_ubo_loads.c
@@ -47,7 +47,7 @@ lower_ubo_load_instr(nir_builder *b, nir_instr *instr, UNUSED void *_data)
unsigned byte_size = bit_size / 8;
nir_ssa_def *val;
- if (nir_src_is_const(load->src[1])) {
+ if (!nir_src_is_divergent(load->src[0]) && nir_src_is_const(load->src[1])) {
uint32_t offset = nir_src_as_uint(load->src[1]);
/* Things should be component-aligned. */
diff --git a/lib/mesa/src/intel/vulkan/anv_perf.c b/lib/mesa/src/intel/vulkan/anv_perf.c
index 49cbef52a..3b23067ab 100644
--- a/lib/mesa/src/intel/vulkan/anv_perf.c
+++ b/lib/mesa/src/intel/vulkan/anv_perf.c
@@ -109,7 +109,10 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
properties[p++] = metric_id;
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
- properties[p++] = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
+ properties[p++] =
+ device->info->verx10 >= 125 ?
+ I915_OA_FORMAT_A24u40_A14u32_B8_C8 :
+ I915_OA_FORMAT_A32u40_A4u32_B8_C8;
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
properties[p++] = 31; /* slowest sampling period */
@@ -363,7 +366,10 @@ VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, &out_desc, desc) {
desc->flags = 0; /* None so far. */
- snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name);
+ snprintf(desc->name, sizeof(desc->name), "%s",
+ INTEL_DEBUG(DEBUG_PERF_SYMBOL_NAMES) ?
+ intel_counter->symbol_name :
+ intel_counter->name);
snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
}
@@ -430,10 +436,12 @@ anv_perf_write_pass_results(struct intel_perf_config *perf,
const struct intel_perf_query_result *accumulated_results,
union VkPerformanceCounterResultKHR *results)
{
+ const struct intel_perf_query_info *query = pool->pass_query[pass];
+
for (uint32_t c = 0; c < pool->n_counters; c++) {
const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
- if (counter_pass->pass != pass)
+ if (counter_pass->query != query)
continue;
switch (pool->pass_query[pass]->kind) {
diff --git a/lib/mesa/src/intel/vulkan/anv_utrace.c b/lib/mesa/src/intel/vulkan/anv_utrace.c
index 3a35aefe4..99dfc50d4 100644
--- a/lib/mesa/src/intel/vulkan/anv_utrace.c
+++ b/lib/mesa/src/intel/vulkan/anv_utrace.c
@@ -23,15 +23,19 @@
#include "anv_private.h"
+#include "ds/intel_tracepoints.h"
+#include "genxml/gen8_pack.h"
#include "perf/intel_perf.h"
+#include "vulkan/runtime/vk_common_entrypoints.h"
+
static uint32_t
command_buffers_count_utraces(struct anv_device *device,
uint32_t cmd_buffer_count,
struct anv_cmd_buffer **cmd_buffers,
uint32_t *utrace_copies)
{
- if (!u_trace_context_actively_tracing(&device->ds.trace_context))
+ if (!u_trace_should_process(&device->ds.trace_context))
return 0;
uint32_t utraces = 0;
@@ -47,25 +51,25 @@ command_buffers_count_utraces(struct anv_device *device,
}
static void
-anv_utrace_delete_flush_data(struct u_trace_context *utctx,
- void *flush_data)
+anv_utrace_delete_submit(struct u_trace_context *utctx, void *submit_data)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
- struct anv_utrace_flush_copy *flush = flush_data;
+ struct anv_utrace_submit *submit = submit_data;
+
+ intel_ds_flush_data_fini(&submit->ds);
- intel_ds_flush_data_fini(&flush->ds);
+ if (submit->trace_bo)
+ anv_bo_pool_free(&device->utrace_bo_pool, submit->trace_bo);
- if (flush->trace_bo) {
- assert(flush->batch_bo);
- anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
- anv_device_release_bo(device, flush->batch_bo);
- anv_device_release_bo(device, flush->trace_bo);
+ if (submit->batch_bo) {
+ anv_reloc_list_finish(&submit->relocs, &device->vk.alloc);
+ anv_bo_pool_free(&device->utrace_bo_pool, submit->batch_bo);
}
- vk_sync_destroy(&device->vk, flush->sync);
+ vk_sync_destroy(&device->vk, submit->sync);
- vk_free(&device->vk.alloc, flush);
+ vk_free(&device->vk.alloc, submit);
}
static void
@@ -77,13 +81,13 @@ anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx,
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
- struct anv_utrace_flush_copy *flush = cmdstream;
+ struct anv_utrace_submit *submit = cmdstream;
struct anv_address from_addr = (struct anv_address) {
.bo = ts_from, .offset = from_offset * sizeof(uint64_t) };
struct anv_address to_addr = (struct anv_address) {
.bo = ts_to, .offset = to_offset * sizeof(uint64_t) };
- anv_genX(device->info, emit_so_memcpy)(&flush->memcpy_state,
+ anv_genX(device->info, emit_so_memcpy)(&submit->memcpy_state,
to_addr, from_addr, count * sizeof(uint64_t));
}
@@ -91,7 +95,7 @@ VkResult
anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
uint32_t cmd_buffer_count,
struct anv_cmd_buffer **cmd_buffers,
- struct anv_utrace_flush_copy **out_flush_data)
+ struct anv_utrace_submit **out_submit)
{
struct anv_device *device = queue->device;
uint32_t utrace_copies = 0;
@@ -100,94 +104,105 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
cmd_buffers,
&utrace_copies);
if (!utraces) {
- *out_flush_data = NULL;
+ *out_submit = NULL;
return VK_SUCCESS;
}
VkResult result;
- struct anv_utrace_flush_copy *flush =
- vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_flush_copy),
+ struct anv_utrace_submit *submit =
+ vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_submit),
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
- if (!flush)
+ if (!submit)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
- intel_ds_flush_data_init(&flush->ds, &queue->ds, queue->ds.submission_id);
+ intel_ds_flush_data_init(&submit->ds, &queue->ds, queue->ds.submission_id);
result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
- 0, 0, &flush->sync);
+ 0, 0, &submit->sync);
if (result != VK_SUCCESS)
goto error_sync;
if (utrace_copies > 0) {
result = anv_bo_pool_alloc(&device->utrace_bo_pool,
utrace_copies * 4096,
- &flush->trace_bo);
+ &submit->trace_bo);
if (result != VK_SUCCESS)
goto error_trace_buf;
+ uint32_t batch_size = 512; /* 128 dwords of setup */
+ if (device->info->verx10 == 120 || intel_device_info_is_dg2(device->info)) {
+ /* Enable/Disable preemption at the begin/end */
+ batch_size += 2 * (250 /* 250 MI_NOOPs*/ +
+ 6 /* PIPE_CONTROL */ +
+ 3 /* MI_LRI */) * 4 /* dwords */;
+ }
+ batch_size += 256 * utrace_copies; /* 64 dwords per copy */
+ batch_size = align(batch_size + 4, 8); /* MI_BATCH_BUFFER_END */
+
result = anv_bo_pool_alloc(&device->utrace_bo_pool,
- /* 128 dwords of setup + 64 dwords per copy */
- align_u32(512 + 64 * utrace_copies, 4096),
- &flush->batch_bo);
+ align(batch_size, 4096),
+ &submit->batch_bo);
if (result != VK_SUCCESS)
goto error_batch_buf;
- result = anv_reloc_list_init(&flush->relocs, &device->vk.alloc);
+ result = anv_reloc_list_init(&submit->relocs, &device->vk.alloc);
if (result != VK_SUCCESS)
goto error_reloc_list;
- flush->batch.alloc = &device->vk.alloc;
- flush->batch.relocs = &flush->relocs;
- anv_batch_set_storage(&flush->batch,
- (struct anv_address) { .bo = flush->batch_bo, },
- flush->batch_bo->map, flush->batch_bo->size);
+ submit->batch.alloc = &device->vk.alloc;
+ submit->batch.relocs = &submit->relocs;
+ anv_batch_set_storage(&submit->batch,
+ (struct anv_address) { .bo = submit->batch_bo, },
+ submit->batch_bo->map, submit->batch_bo->size);
/* Emit the copies */
- anv_genX(device->info, emit_so_memcpy_init)(&flush->memcpy_state,
- device,
- &flush->batch);
+ anv_genX(device->info, emit_so_memcpy_init)(&submit->memcpy_state,
+ device,
+ &submit->batch);
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
- u_trace_flush(&cmd_buffers[i]->trace, flush, false);
+ u_trace_flush(&cmd_buffers[i]->trace, submit, false);
} else {
u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
u_trace_end_iterator(&cmd_buffers[i]->trace),
- &flush->ds.trace,
- flush,
+ &submit->ds.trace,
+ submit,
anv_device_utrace_emit_copy_ts_buffer);
}
}
- anv_genX(device->info, emit_so_memcpy_fini)(&flush->memcpy_state);
+ anv_genX(device->info, emit_so_memcpy_fini)(&submit->memcpy_state);
+ anv_genX(device->info, emit_so_memcpy_end)(&submit->memcpy_state);
- u_trace_flush(&flush->ds.trace, flush, true);
+ u_trace_flush(&submit->ds.trace, submit, true);
- if (flush->batch.status != VK_SUCCESS) {
- result = flush->batch.status;
+ if (submit->batch.status != VK_SUCCESS) {
+ result = submit->batch.status;
goto error_batch;
}
} else {
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
- u_trace_flush(&cmd_buffers[i]->trace, flush, i == (cmd_buffer_count - 1));
+ u_trace_flush(&cmd_buffers[i]->trace, submit, i == (cmd_buffer_count - 1));
}
}
- flush->queue = queue;
+ submit->queue = queue;
- *out_flush_data = flush;
+ *out_submit = submit;
return VK_SUCCESS;
error_batch:
- anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
+ anv_reloc_list_finish(&submit->relocs, &device->vk.alloc);
error_reloc_list:
- anv_bo_pool_free(&device->utrace_bo_pool, flush->batch_bo);
+ anv_bo_pool_free(&device->utrace_bo_pool, submit->batch_bo);
error_batch_buf:
- anv_bo_pool_free(&device->utrace_bo_pool, flush->trace_bo);
+ anv_bo_pool_free(&device->utrace_bo_pool, submit->trace_bo);
error_trace_buf:
- vk_sync_destroy(&device->vk, flush->sync);
+ vk_sync_destroy(&device->vk, submit->sync);
error_sync:
- vk_free(&device->vk.alloc, flush);
+ intel_ds_flush_data_fini(&submit->ds);
+ vk_free(&device->vk.alloc, submit);
return result;
}
@@ -200,7 +215,7 @@ anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
struct anv_bo *bo = NULL;
UNUSED VkResult result =
anv_bo_pool_alloc(&device->utrace_bo_pool,
- align_u32(size_b, 4096),
+ align(size_b, 4096),
&bo);
assert(result == VK_SUCCESS);
@@ -222,15 +237,17 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs,
void *timestamps, unsigned idx,
bool end_of_pipe)
{
- struct anv_cmd_buffer *cmd_buffer =
- container_of(ut, struct anv_cmd_buffer, trace);
- struct anv_device *device = cmd_buffer->device;
+ struct anv_device *device =
+ container_of(ut->utctx, struct anv_device, ds.trace_context);
+ struct anv_batch *batch =
+ cs != NULL ? cs :
+ &container_of(ut, struct anv_cmd_buffer, trace)->batch;
struct anv_bo *bo = timestamps;
enum anv_timestamp_capture_type capture_type =
(end_of_pipe) ? ANV_TIMESTAMP_CAPTURE_END_OF_PIPE
: ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE;
- device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device,
+ device->physical->cmd_emit_timestamp(batch, device,
(struct anv_address) {
.bo = bo,
.offset = idx * sizeof(uint64_t) },
@@ -244,13 +261,13 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
struct anv_bo *bo = timestamps;
- struct anv_utrace_flush_copy *flush = flush_data;
+ struct anv_utrace_submit *submit = flush_data;
/* Only need to stall on results for the first entry: */
if (idx == 0) {
UNUSED VkResult result =
vk_sync_wait(&device->vk,
- flush->sync,
+ submit->sync,
0,
VK_SYNC_WAIT_COMPLETE,
os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE));
@@ -271,7 +288,7 @@ anv_device_utrace_init(struct anv_device *device)
{
anv_bo_pool_init(&device->utrace_bo_pool, device, "utrace");
intel_ds_device_init(&device->ds, device->info, device->fd,
- device->physical->local_minor - 128,
+ device->physical->local_minor,
INTEL_DS_API_VULKAN);
u_trace_context_init(&device->ds.trace_context,
&device->ds,
@@ -279,14 +296,14 @@ anv_device_utrace_init(struct anv_device *device)
anv_utrace_destroy_ts_buffer,
anv_utrace_record_ts,
anv_utrace_read_ts,
- anv_utrace_delete_flush_data);
+ anv_utrace_delete_submit);
for (uint32_t q = 0; q < device->queue_count; q++) {
struct anv_queue *queue = &device->queues[q];
intel_ds_device_init_queue(&device->ds, &queue->ds, "%s%u",
- intel_engines_class_to_string(queue->family->engine_class),
- queue->index_in_family);
+ intel_engines_class_to_string(queue->family->engine_class),
+ queue->vk.index_in_family);
}
}
@@ -319,6 +336,8 @@ anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits)
{ .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT, .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, },
{ .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT, .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, },
{ .anv = ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, .ds = INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, },
+ { .anv = ANV_PIPE_PSS_STALL_SYNC_BIT, .ds = INTEL_DS_PSS_STALL_SYNC_BIT, },
+ { .anv = ANV_PIPE_END_OF_PIPE_SYNC_BIT, .ds = INTEL_DS_END_OF_PIPE_BIT, },
};
enum intel_ds_stall_flag ret = 0;
@@ -329,3 +348,140 @@ anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits)
return ret;
}
+
+void anv_CmdBeginDebugUtilsLabelEXT(
+ VkCommandBuffer _commandBuffer,
+ const VkDebugUtilsLabelEXT *pLabelInfo)
+{
+ VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _commandBuffer);
+
+ vk_common_CmdBeginDebugUtilsLabelEXT(_commandBuffer, pLabelInfo);
+
+ trace_intel_begin_cmd_buffer_annotation(&cmd_buffer->trace);
+}
+
+void anv_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)
+{
+ VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _commandBuffer);
+
+ if (cmd_buffer->vk.labels.size > 0) {
+ const VkDebugUtilsLabelEXT *label =
+ util_dynarray_top_ptr(&cmd_buffer->vk.labels, VkDebugUtilsLabelEXT);
+
+ trace_intel_end_cmd_buffer_annotation(&cmd_buffer->trace,
+ strlen(label->pLabelName),
+ label->pLabelName);
+ }
+
+ vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer);
+}
+
+void
+anv_queue_trace(struct anv_queue *queue, const char *label, bool frame, bool begin)
+{
+ struct anv_device *device = queue->device;
+
+ VkResult result;
+ struct anv_utrace_submit *submit =
+ vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_submit),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!submit)
+ return;
+
+ submit->queue = queue;
+
+ intel_ds_flush_data_init(&submit->ds, &queue->ds, queue->ds.submission_id);
+
+ result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
+ 0, 0, &submit->sync);
+ if (result != VK_SUCCESS)
+ goto error_trace;
+
+ result = anv_bo_pool_alloc(&device->utrace_bo_pool, 4096,
+ &submit->batch_bo);
+ if (result != VK_SUCCESS)
+ goto error_sync;
+
+ result = anv_reloc_list_init(&submit->relocs, &device->vk.alloc);
+ if (result != VK_SUCCESS)
+ goto error_batch_bo;
+
+ submit->batch.alloc = &device->vk.alloc;
+ submit->batch.relocs = &submit->relocs;
+ anv_batch_set_storage(&submit->batch,
+ (struct anv_address) { .bo = submit->batch_bo, },
+ submit->batch_bo->map, submit->batch_bo->size);
+
+ if (frame) {
+ if (begin)
+ trace_intel_begin_frame(&submit->ds.trace, &submit->batch);
+ else
+ trace_intel_end_frame(&submit->ds.trace, &submit->batch,
+ device->debug_frame_desc->frame_id);
+ } else {
+ if (begin) {
+ trace_intel_begin_queue_annotation(&submit->ds.trace, &submit->batch);
+ } else {
+ trace_intel_end_queue_annotation(&submit->ds.trace,
+ &submit->batch,
+ strlen(label),
+ label);
+ }
+ }
+
+ anv_batch_emit(&submit->batch, GFX8_MI_BATCH_BUFFER_END, bbs);
+ anv_batch_emit(&submit->batch, GFX8_MI_NOOP, noop);
+
+ if (submit->batch.status != VK_SUCCESS) {
+ result = submit->batch.status;
+ goto error_reloc_list;
+ }
+
+ u_trace_flush(&submit->ds.trace, submit, true);
+
+ pthread_mutex_lock(&device->mutex);
+ device->kmd_backend->queue_exec_trace(queue, submit);
+ pthread_mutex_unlock(&device->mutex);
+
+ return;
+
+ error_reloc_list:
+ anv_reloc_list_finish(&submit->relocs, &device->vk.alloc);
+ error_batch_bo:
+ anv_bo_pool_free(&device->utrace_bo_pool, submit->batch_bo);
+ error_sync:
+ vk_sync_destroy(&device->vk, submit->sync);
+ error_trace:
+ intel_ds_flush_data_fini(&submit->ds);
+ vk_free(&device->vk.alloc, submit);
+}
+
+void
+anv_QueueBeginDebugUtilsLabelEXT(
+ VkQueue _queue,
+ const VkDebugUtilsLabelEXT *pLabelInfo)
+{
+ VK_FROM_HANDLE(anv_queue, queue, _queue);
+
+ vk_common_QueueBeginDebugUtilsLabelEXT(_queue, pLabelInfo);
+
+ anv_queue_trace(queue, pLabelInfo->pLabelName,
+ false /* frame */, true /* begin */);
+}
+
+void
+anv_QueueEndDebugUtilsLabelEXT(VkQueue _queue)
+{
+ VK_FROM_HANDLE(anv_queue, queue, _queue);
+
+ if (queue->vk.labels.size > 0) {
+ const VkDebugUtilsLabelEXT *label =
+ util_dynarray_top_ptr(&queue->vk.labels, VkDebugUtilsLabelEXT);
+ anv_queue_trace(queue, label->pLabelName,
+ false /* frame */, false /* begin */);
+
+ u_trace_context_process(&queue->device->ds.trace_context, true);
+ }
+
+ vk_common_QueueEndDebugUtilsLabelEXT(_queue);
+}
diff --git a/lib/mesa/src/intel/vulkan/anv_video.c b/lib/mesa/src/intel/vulkan/anv_video.c
new file mode 100644
index 000000000..38a3b09b2
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/anv_video.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright © 2021 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+
+#include "vk_video/vulkan_video_codecs_common.h"
+
+VkResult
+anv_CreateVideoSessionKHR(VkDevice _device,
+ const VkVideoSessionCreateInfoKHR *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkVideoSessionKHR *pVideoSession)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+
+ struct anv_video_session *vid =
+ vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*vid), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!vid)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ memset(vid, 0, sizeof(struct anv_video_session));
+
+ VkResult result = vk_video_session_init(&device->vk,
+ &vid->vk,
+ pCreateInfo);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, pAllocator, vid);
+ return result;
+ }
+
+ *pVideoSession = anv_video_session_to_handle(vid);
+ return VK_SUCCESS;
+}
+
+void
+anv_DestroyVideoSessionKHR(VkDevice _device,
+ VkVideoSessionKHR _session,
+ const VkAllocationCallbacks *pAllocator)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_video_session, vid, _session);
+ if (!_session)
+ return;
+
+ vk_object_base_finish(&vid->vk.base);
+ vk_free2(&device->vk.alloc, pAllocator, vid);
+}
+
+VkResult
+anv_CreateVideoSessionParametersKHR(VkDevice _device,
+ const VkVideoSessionParametersCreateInfoKHR *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkVideoSessionParametersKHR *pVideoSessionParameters)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_video_session, vid, pCreateInfo->videoSession);
+ ANV_FROM_HANDLE(anv_video_session_params, templ, pCreateInfo->videoSessionParametersTemplate);
+ struct anv_video_session_params *params =
+ vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*params), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!params)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ VkResult result = vk_video_session_parameters_init(&device->vk,
+ &params->vk,
+ &vid->vk,
+ templ ? &templ->vk : NULL,
+ pCreateInfo);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, pAllocator, params);
+ return result;
+ }
+
+ *pVideoSessionParameters = anv_video_session_params_to_handle(params);
+ return VK_SUCCESS;
+}
+
+void
+anv_DestroyVideoSessionParametersKHR(VkDevice _device,
+ VkVideoSessionParametersKHR _params,
+ const VkAllocationCallbacks *pAllocator)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_video_session_params, params, _params);
+ if (!_params)
+ return;
+ vk_video_session_parameters_finish(&device->vk, &params->vk);
+ vk_free2(&device->vk.alloc, pAllocator, params);
+}
+
+VkResult
+anv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice,
+ const VkVideoProfileInfoKHR *pVideoProfile,
+ VkVideoCapabilitiesKHR *pCapabilities)
+{
+ pCapabilities->minBitstreamBufferOffsetAlignment = 32;
+ pCapabilities->minBitstreamBufferSizeAlignment = 32;
+ pCapabilities->pictureAccessGranularity.width = ANV_MB_WIDTH;
+ pCapabilities->pictureAccessGranularity.height = ANV_MB_HEIGHT;
+ pCapabilities->minCodedExtent.width = ANV_MB_WIDTH;
+ pCapabilities->minCodedExtent.height = ANV_MB_HEIGHT;
+ pCapabilities->maxCodedExtent.width = 4096;
+ pCapabilities->maxCodedExtent.height = 4096;
+ pCapabilities->flags = VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
+
+ struct VkVideoDecodeCapabilitiesKHR *dec_caps = (struct VkVideoDecodeCapabilitiesKHR *)
+ vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_CAPABILITIES_KHR);
+ if (dec_caps)
+ dec_caps->flags = VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR;
+
+ switch (pVideoProfile->videoCodecOperation) {
+ case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
+ struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *)
+ vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR);
+ pCapabilities->maxDpbSlots = 17;
+ pCapabilities->maxActiveReferencePictures = 16;
+
+ ext->fieldOffsetGranularity.x = 0;
+ ext->fieldOffsetGranularity.y = 0;
+ ext->maxLevelIdc = 51;
+ strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME);
+ pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION;
+ break;
+ }
+ default:
+ break;
+ }
+ return VK_SUCCESS;
+}
+
+VkResult
+anv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceVideoFormatInfoKHR *pVideoFormatInfo,
+ uint32_t *pVideoFormatPropertyCount,
+ VkVideoFormatPropertiesKHR *pVideoFormatProperties)
+{
+ *pVideoFormatPropertyCount = 1;
+
+ if (!pVideoFormatProperties)
+ return VK_SUCCESS;
+
+ pVideoFormatProperties[0].format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
+ pVideoFormatProperties[0].imageType = VK_IMAGE_TYPE_2D;
+ pVideoFormatProperties[0].imageTiling = VK_IMAGE_TILING_OPTIMAL;
+ pVideoFormatProperties[0].imageUsageFlags = pVideoFormatInfo->imageUsage;
+ return VK_SUCCESS;
+}
+
+static void
+get_h264_video_session_mem_reqs(struct anv_video_session *vid,
+ VkVideoSessionMemoryRequirementsKHR *mem_reqs,
+ uint32_t memory_types)
+{
+ uint32_t width_in_mb = align(vid->vk.max_coded.width, ANV_MB_WIDTH) / ANV_MB_WIDTH;
+ /* intra row store is width in macroblocks * 64 */
+ mem_reqs[0].memoryBindIndex = ANV_VID_MEM_H264_INTRA_ROW_STORE;
+ mem_reqs[0].memoryRequirements.size = width_in_mb * 64;
+ mem_reqs[0].memoryRequirements.alignment = 4096;
+ mem_reqs[0].memoryRequirements.memoryTypeBits = memory_types;
+
+ /* deblocking filter row store is width in macroblocks * 64 * 4*/
+ mem_reqs[1].memoryBindIndex = ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE;
+ mem_reqs[1].memoryRequirements.size = width_in_mb * 64 * 4;
+ mem_reqs[1].memoryRequirements.alignment = 4096;
+ mem_reqs[1].memoryRequirements.memoryTypeBits = memory_types;
+
+ /* bsd mpc row scratch is width in macroblocks * 64 * 2 */
+ mem_reqs[2].memoryBindIndex = ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH;
+ mem_reqs[2].memoryRequirements.size = width_in_mb * 64 * 2;
+ mem_reqs[2].memoryRequirements.alignment = 4096;
+ mem_reqs[2].memoryRequirements.memoryTypeBits = memory_types;
+
+ /* mpr row scratch is width in macroblocks * 64 * 2 */
+ mem_reqs[3].memoryBindIndex = ANV_VID_MEM_H264_MPR_ROW_SCRATCH;
+ mem_reqs[3].memoryRequirements.size = width_in_mb * 64 * 2;
+ mem_reqs[3].memoryRequirements.alignment = 4096;
+ mem_reqs[3].memoryRequirements.memoryTypeBits = memory_types;
+}
+
+VkResult
+anv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device,
+ VkVideoSessionKHR videoSession,
+ uint32_t *pVideoSessionMemoryRequirementsCount,
+ VkVideoSessionMemoryRequirementsKHR *mem_reqs)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_video_session, vid, videoSession);
+
+ switch (vid->vk.op) {
+ case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
+ *pVideoSessionMemoryRequirementsCount = ANV_VIDEO_MEM_REQS_H264;
+ break;
+ default:
+ unreachable("unknown codec");
+ }
+ if (!mem_reqs)
+ return VK_SUCCESS;
+
+ uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
+ switch (vid->vk.op) {
+ case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
+ get_h264_video_session_mem_reqs(vid, mem_reqs, memory_types);
+ break;
+ default:
+ unreachable("unknown codec");
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult
+anv_UpdateVideoSessionParametersKHR(VkDevice _device,
+ VkVideoSessionParametersKHR _params,
+ const VkVideoSessionParametersUpdateInfoKHR *pUpdateInfo)
+{
+ ANV_FROM_HANDLE(anv_video_session_params, params, _params);
+ return vk_video_session_parameters_update(&params->vk, pUpdateInfo);
+}
+
+static void
+copy_bind(struct anv_vid_mem *dst,
+ const VkBindVideoSessionMemoryInfoKHR *src)
+{
+ dst->mem = anv_device_memory_from_handle(src->memory);
+ dst->offset = src->memoryOffset;
+ dst->size = src->memorySize;
+}
+
+VkResult
+anv_BindVideoSessionMemoryKHR(VkDevice _device,
+ VkVideoSessionKHR videoSession,
+ uint32_t bind_mem_count,
+ const VkBindVideoSessionMemoryInfoKHR *bind_mem)
+{
+ ANV_FROM_HANDLE(anv_video_session, vid, videoSession);
+
+ assert(bind_mem_count == 4);
+ switch (vid->vk.op) {
+ case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
+ for (unsigned i = 0; i < bind_mem_count; i++) {
+ copy_bind(&vid->vid_mem[bind_mem[i].memoryBindIndex], &bind_mem[i]);
+ }
+ break;
+ default:
+ unreachable("unknown codec");
+ }
+ return VK_SUCCESS;
+}
diff --git a/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c b/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c
index 3958452f0..4c675e985 100644
--- a/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c
+++ b/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c
@@ -31,6 +31,7 @@
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
+#include "genxml/genX_rt_pack.h"
#if GFX_VERx10 >= 125
@@ -167,7 +168,7 @@ get_gpu_size_estimate(const VkAccelerationStructureBuildGeometryInfoKHR *pInfo,
struct MKSizeEstimate est = {};
uint64_t size = sizeof(BVHBase);
- size = align_u64(size, 64);
+ size = align64(size, 64);
/* Must immediately follow BVHBase because we use fixed offset to nodes. */
est.node_data_start = size;
@@ -258,25 +259,25 @@ get_gpu_size_estimate(const VkAccelerationStructureBuildGeometryInfoKHR *pInfo,
unreachable("Unsupported acceleration structure type");
}
- size = align_u64(size, 64);
+ size = align64(size, 64);
est.instance_descs_start = size;
size += sizeof(struct InstanceDesc) * num_instances;
est.geo_meta_data_start = size;
size += sizeof(struct GeoMetaData) * pInfo->geometryCount;
- size = align_u64(size, 64);
+ size = align64(size, 64);
- assert(size == align_u64(size, 64));
+ assert(size == align64(size, 64));
est.back_pointer_start = size;
const bool alloc_backpointers = false; /* RT TODO */
if (alloc_backpointers) {
size += est.max_inner_nodes * sizeof(uint32_t);
- size = align_u64(size, 64);
+ size = align64(size, 64);
}
assert(size < UINT32_MAX);
- est.sizeTotal = align_u64(size, 64);
+ est.sizeTotal = align64(size, 64);
return est;
}
@@ -392,62 +393,6 @@ genX(GetAccelerationStructureBuildSizesKHR)(
pSizeInfo->updateScratchSize = gpu_size_info.updateScratchSize;
}
-VkResult
-genX(CreateAccelerationStructureKHR)(
- VkDevice _device,
- const VkAccelerationStructureCreateInfoKHR* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkAccelerationStructureKHR* pAccelerationStructure)
-{
- ANV_FROM_HANDLE(anv_device, device, _device);
- ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer);
- struct anv_acceleration_structure *accel;
-
- accel = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*accel), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (accel == NULL)
- return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- vk_object_base_init(&device->vk, &accel->base,
- VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR);
-
- accel->size = pCreateInfo->size;
- accel->address = anv_address_add(buffer->address, pCreateInfo->offset);
-
- *pAccelerationStructure = anv_acceleration_structure_to_handle(accel);
-
- return VK_SUCCESS;
-}
-
-void
-genX(DestroyAccelerationStructureKHR)(
- VkDevice _device,
- VkAccelerationStructureKHR accelerationStructure,
- const VkAllocationCallbacks* pAllocator)
-{
- ANV_FROM_HANDLE(anv_device, device, _device);
- ANV_FROM_HANDLE(anv_acceleration_structure, accel, accelerationStructure);
-
- if (!accel)
- return;
-
- vk_object_base_finish(&accel->base);
- vk_free2(&device->vk.alloc, pAllocator, accel);
-}
-
-VkDeviceAddress
-genX(GetAccelerationStructureDeviceAddressKHR)(
- VkDevice device,
- const VkAccelerationStructureDeviceAddressInfoKHR* pInfo)
-{
- ANV_FROM_HANDLE(anv_acceleration_structure, accel,
- pInfo->accelerationStructure);
-
- assert(!anv_address_is_null(accel->address));
-
- return anv_address_physical(accel->address);
-}
-
void
genX(GetDeviceAccelerationStructureCompatibilityKHR)(
VkDevice _device,
@@ -703,12 +648,12 @@ cmd_build_acceleration_structures(
const uint32_t *pMaxPrimitiveCounts =
ppMaxPrimitiveCounts ? ppMaxPrimitiveCounts[i] : NULL;
- ANV_FROM_HANDLE(anv_acceleration_structure, dst_accel,
+ ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel,
pInfo->dstAccelerationStructure);
bs->build_method = device->bvh_build_method;
- bs->bvh_addr = dst_accel->address;
+ bs->bvh_addr = anv_address_from_u64(vk_acceleration_structure_get_va(dst_accel));
bs->estimate = get_gpu_size_estimate(pInfo, pBuildRangeInfos,
pMaxPrimitiveCounts);
@@ -872,6 +817,17 @@ cmd_build_acceleration_structures(
&data, sizeof(data));
}
+ if (anv_cmd_buffer_is_render_queue(cmd_buffer))
+ genX(flush_pipeline_select_gpgpu)(cmd_buffer);
+
+ /* Due to the nature of GRL and its heavy use of jumps/predication, we
+ * cannot tell exactly in what order the CFE_STATE we insert are going to
+ * be executed. So always use the largest possible size.
+ */
+ genX(cmd_buffer_ensure_cfe_state)(
+ cmd_buffer,
+ cmd_buffer->device->physical->max_grl_scratch_size);
+
/* Round 1 : init_globals kernel */
genX(grl_misc_batched_init_globals)(
cmd_buffer,
@@ -1162,24 +1118,26 @@ genX(CmdCopyAccelerationStructureKHR)(
const VkCopyAccelerationStructureInfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
- ANV_FROM_HANDLE(anv_acceleration_structure, src_accel, pInfo->src);
- ANV_FROM_HANDLE(anv_acceleration_structure, dst_accel, pInfo->dst);
+ ANV_FROM_HANDLE(vk_acceleration_structure, src_accel, pInfo->src);
+ ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, pInfo->dst);
assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR ||
pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR);
if (pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR) {
- struct anv_address src_size_addr = anv_address_add(
- src_accel->address,
- offsetof(struct BVHBase, Meta.allocationSize));
- genX(grl_copy_clone_indirect)(cmd_buffer,
- anv_address_physical(dst_accel->address),
- anv_address_physical(src_accel->address),
- anv_address_physical(src_size_addr));
+ uint64_t src_size_addr =
+ vk_acceleration_structure_get_va(src_accel) +
+ offsetof(struct BVHBase, Meta.allocationSize);
+ genX(grl_copy_clone_indirect)(
+ cmd_buffer,
+ vk_acceleration_structure_get_va(dst_accel),
+ vk_acceleration_structure_get_va(src_accel),
+ src_size_addr);
} else {
- genX(grl_copy_compact)(cmd_buffer,
- anv_address_physical(dst_accel->address),
- anv_address_physical(src_accel->address));
+ genX(grl_copy_compact)(
+ cmd_buffer,
+ vk_acceleration_structure_get_va(dst_accel),
+ vk_acceleration_structure_get_va(src_accel));
}
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT;
@@ -1191,19 +1149,20 @@ genX(CmdCopyAccelerationStructureToMemoryKHR)(
const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
- ANV_FROM_HANDLE(anv_acceleration_structure, src_accel, pInfo->src);
+ ANV_FROM_HANDLE(vk_acceleration_structure, src_accel, pInfo->src);
struct anv_device *device = cmd_buffer->device;
- struct anv_address src_size_addr = anv_address_add(
- src_accel->address,
- offsetof(struct BVHBase, Meta.allocationSize));
+ uint64_t src_size_addr =
+ vk_acceleration_structure_get_va(src_accel) +
+ offsetof(struct BVHBase, Meta.allocationSize);
assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR);
- genX(grl_copy_serialize_indirect)(cmd_buffer,
- pInfo->dst.deviceAddress,
- anv_address_physical(src_accel->address),
- anv_address_physical(device->rt_uuid_addr),
- anv_address_physical(src_size_addr));
+ genX(grl_copy_serialize_indirect)(
+ cmd_buffer,
+ pInfo->dst.deviceAddress,
+ vk_acceleration_structure_get_va(src_accel),
+ anv_address_physical(device->rt_uuid_addr),
+ src_size_addr);
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT;
}
@@ -1214,16 +1173,17 @@ genX(CmdCopyMemoryToAccelerationStructureKHR)(
const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
- ANV_FROM_HANDLE(anv_acceleration_structure, dst_accel, pInfo->dst);
+ ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, pInfo->dst);
assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR);
uint64_t src_size_addr = pInfo->src.deviceAddress +
offsetof(struct SerializationHeader, DeserializedSizeInBytes);
- genX(grl_copy_deserialize_indirect)(cmd_buffer,
- anv_address_physical(dst_accel->address),
- pInfo->src.deviceAddress,
- src_size_addr);
+ genX(grl_copy_deserialize_indirect)(
+ cmd_buffer,
+ vk_acceleration_structure_get_va(dst_accel),
+ pInfo->src.deviceAddress,
+ src_size_addr);
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT;
}
diff --git a/lib/mesa/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/lib/mesa/src/intel/vulkan/genX_cmd_draw_generated_indirect.h
new file mode 100644
index 000000000..ccb1bd7a2
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/genX_cmd_draw_generated_indirect.h
@@ -0,0 +1,750 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef GENX_CMD_GENERATED_INDIRECT_DRAW_H
+#define GENX_CMD_GENERATED_INDIRECT_DRAW_H
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include "util/macros.h"
+
+#include "common/intel_genX_state.h"
+
+#include "anv_private.h"
+#include "anv_generated_indirect_draws.h"
+
+/* This is a maximum number of items a fragment shader can generate due to the
+ * viewport size.
+ */
+#define MAX_GENERATED_DRAW_COUNT (8192 * 8192)
+
+static void
+genX(cmd_buffer_emit_generate_draws_pipeline)(struct anv_cmd_buffer *cmd_buffer)
+{
+ struct anv_batch *batch = &cmd_buffer->generation_batch;
+ struct anv_device *device = cmd_buffer->device;
+ const struct anv_shader_bin *draw_kernel = device->generated_draw_kernel;
+ const struct brw_wm_prog_data *prog_data =
+ brw_wm_prog_data_const(draw_kernel->prog_data);
+
+ uint32_t *dw = anv_batch_emitn(batch,
+ 1 + 2 * GENX(VERTEX_ELEMENT_STATE_length),
+ GENX(3DSTATE_VERTEX_ELEMENTS));
+ /* You might think there is some shady stuff going here and you would be
+ * right. We're setting up 2 VERTEX_ELEMENT_STATE yet we're only providing
+ * 1 (positions) VERTEX_BUFFER_STATE later.
+ *
+ * Find more about how to set up a 3D pipeline with a fragment shader but
+ * without a vertex shader in blorp_emit_vertex_elements() in
+ * blorp_genX_exec.h.
+ */
+ GENX(VERTEX_ELEMENT_STATE_pack)(
+ batch, dw + 1, &(struct GENX(VERTEX_ELEMENT_STATE)) {
+ .VertexBufferIndex = 1,
+ .Valid = true,
+ .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
+ .SourceElementOffset = 0,
+ .Component0Control = VFCOMP_STORE_SRC,
+ .Component1Control = VFCOMP_STORE_0,
+ .Component2Control = VFCOMP_STORE_0,
+ .Component3Control = VFCOMP_STORE_0,
+ });
+ GENX(VERTEX_ELEMENT_STATE_pack)(
+ batch, dw + 3, &(struct GENX(VERTEX_ELEMENT_STATE)) {
+ .VertexBufferIndex = 0,
+ .Valid = true,
+ .SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT,
+ .SourceElementOffset = 0,
+ .Component0Control = VFCOMP_STORE_SRC,
+ .Component1Control = VFCOMP_STORE_SRC,
+ .Component2Control = VFCOMP_STORE_SRC,
+ .Component3Control = VFCOMP_STORE_1_FP,
+ });
+
+ anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vf);
+ anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
+ sgvs.InstanceIDEnable = true;
+ sgvs.InstanceIDComponentNumber = COMP_1;
+ sgvs.InstanceIDElementOffset = 0;
+ }
+#if GFX_VER >= 11
+ anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
+#endif
+ anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
+ vfi.InstancingEnable = false;
+ vfi.VertexElementIndex = 0;
+ }
+ anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
+ vfi.InstancingEnable = false;
+ vfi.VertexElementIndex = 1;
+ }
+
+ anv_batch_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
+ topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
+ }
+
+ /* Emit URB setup. We tell it that the VS is active because we want it to
+ * allocate space for the VS. Even though one isn't run, we need VUEs to
+ * store the data that VF is going to pass to SOL.
+ */
+ const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 };
+
+ genX(emit_l3_config)(batch, device, device->generated_draw_l3_config);
+
+ cmd_buffer->state.current_l3_config = device->generated_draw_l3_config;
+
+ enum intel_urb_deref_block_size deref_block_size;
+ genX(emit_urb_setup)(device, batch, device->generated_draw_l3_config,
+ VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
+ entry_size, &deref_block_size);
+
+ anv_batch_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
+ ps_blend.HasWriteableRT = true;
+ }
+
+ anv_batch_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm);
+
+#if GFX_VER >= 12
+ anv_batch_emit(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
+ db.DepthBoundsTestEnable = false;
+ db.DepthBoundsTestMinValue = 0.0;
+ db.DepthBoundsTestMaxValue = 1.0;
+ }
+#endif
+
+ anv_batch_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms);
+ anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
+ sm.SampleMask = 0x1;
+ }
+
+ anv_batch_emit(batch, GENX(3DSTATE_VS), vs);
+ anv_batch_emit(batch, GENX(3DSTATE_HS), hs);
+ anv_batch_emit(batch, GENX(3DSTATE_TE), te);
+ anv_batch_emit(batch, GENX(3DSTATE_DS), DS);
+
+#if GFX_VERx10 >= 125
+ if (device->vk.enabled_extensions.NV_mesh_shader ||
+ device->vk.enabled_extensions.EXT_mesh_shader) {
+ anv_batch_emit(batch, GENX(3DSTATE_MESH_CONTROL), mesh);
+ anv_batch_emit(batch, GENX(3DSTATE_TASK_CONTROL), task);
+ }
+#endif
+
+ anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so);
+
+ anv_batch_emit(batch, GENX(3DSTATE_GS), gs);
+
+ anv_batch_emit(batch, GENX(3DSTATE_CLIP), clip) {
+ clip.PerspectiveDivideDisable = true;
+ }
+
+ anv_batch_emit(batch, GENX(3DSTATE_SF), sf) {
+#if GFX_VER >= 12
+ sf.DerefBlockSize = deref_block_size;
+#endif
+ }
+
+ anv_batch_emit(batch, GENX(3DSTATE_RASTER), raster) {
+ raster.CullMode = CULLMODE_NONE;
+ }
+
+ anv_batch_emit(batch, GENX(3DSTATE_SBE), sbe) {
+ sbe.VertexURBEntryReadOffset = 1;
+ sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
+ sbe.VertexURBEntryReadLength = MAX2((prog_data->num_varying_inputs + 1) / 2, 1);
+ sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
+ sbe.ForceVertexURBEntryReadLength = true;
+ sbe.ForceVertexURBEntryReadOffset = true;
+ for (unsigned i = 0; i < 32; i++)
+ sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
+ }
+
+ anv_batch_emit(batch, GENX(3DSTATE_WM), wm);
+
+ anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
+ intel_set_ps_dispatch_state(&ps, device->info, prog_data,
+ 1 /* rasterization_samples */,
+ 0 /* msaa_flags */);
+
+ ps.VectorMaskEnable = prog_data->uses_vmask;
+
+ ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
+ ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
+ prog_data->base.ubo_ranges[0].length;
+
+ ps.DispatchGRFStartRegisterForConstantSetupData0 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
+ ps.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
+ ps.DispatchGRFStartRegisterForConstantSetupData2 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
+
+ ps.KernelStartPointer0 = draw_kernel->kernel.offset +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 0);
+ ps.KernelStartPointer1 = draw_kernel->kernel.offset +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 1);
+ ps.KernelStartPointer2 = draw_kernel->kernel.offset +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 2);
+
+ ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
+ }
+
+ anv_batch_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
+ psx.PixelShaderValid = true;
+ psx.AttributeEnable = prog_data->num_varying_inputs > 0;
+ psx.PixelShaderIsPerSample = prog_data->persample_dispatch;
+ psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
+ psx.PixelShaderComputesStencil = prog_data->computed_stencil;
+ }
+
+ anv_batch_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
+ struct anv_state cc_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 4 * GENX(CC_VIEWPORT_length), 32);
+ struct GENX(CC_VIEWPORT) cc_viewport = {
+ .MinimumDepth = 0.0f,
+ .MaximumDepth = 1.0f,
+ };
+ GENX(CC_VIEWPORT_pack)(NULL, cc_state.map, &cc_viewport);
+ cc.CCViewportPointer = cc_state.offset;
+ }
+
+#if GFX_VER >= 12
+ /* Disable Primitive Replication. */
+ anv_batch_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
+#endif
+
+ anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc);
+ anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_HS), alloc);
+ anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_DS), alloc);
+ anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), alloc);
+ anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), alloc) {
+ alloc.ConstantBufferOffset = 0;
+ alloc.ConstantBufferSize = cmd_buffer->device->info->max_constant_urb_size_kb;
+ }
+
+#if GFX_VERx10 == 125
+ /* DG2: Wa_22011440098
+ * MTL: Wa_18022330953
+ *
+ * In 3D mode, after programming push constant alloc command immediately
+ * program push constant command(ZERO length) without any commit between
+ * them.
+ *
+ * Note that Wa_16011448509 isn't needed here as all address bits are zero.
+ */
+ anv_batch_emit(batch, GENX(3DSTATE_CONSTANT_ALL), c) {
+ /* Update empty push constants for all stages (bitmask = 11111b) */
+ c.ShaderUpdateEnable = 0x1f;
+ c.MOCS = anv_mocs(cmd_buffer->device, NULL, 0);
+ }
+#endif
+
+#if GFX_VER == 9
+ /* Allocate a binding table for Gfx9 for 2 reason :
+ *
+ * 1. we need a to emit a 3DSTATE_BINDING_TABLE_POINTERS_PS to make the
+ * HW apply the preceeding 3DSTATE_CONSTANT_PS
+ *
+ * 2. Emitting an empty 3DSTATE_BINDING_TABLE_POINTERS_PS would cause RT
+ * writes (even though they're empty) to disturb later writes
+ * (probably due to RT cache)
+ *
+ * Our binding table only has one entry to the null surface.
+ */
+ uint32_t bt_offset;
+ cmd_buffer->generation_bt_state =
+ anv_cmd_buffer_alloc_binding_table(cmd_buffer, 1, &bt_offset);
+ if (cmd_buffer->generation_bt_state.map == NULL) {
+ VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
+ if (result != VK_SUCCESS)
+ return;
+
+ /* Re-emit state base addresses so we get the new surface state base
+ * address before we start emitting binding tables etc.
+ */
+ genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
+
+ cmd_buffer->generation_bt_state =
+ anv_cmd_buffer_alloc_binding_table(cmd_buffer, 1, &bt_offset);
+ assert(cmd_buffer->generation_bt_state.map != NULL);
+ }
+
+ uint32_t *bt_map = cmd_buffer->generation_bt_state.map;
+ bt_map[0] = anv_bindless_state_for_binding_table(
+ cmd_buffer->device->null_surface_state).offset + bt_offset;
+
+ cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
+#endif
+
+ cmd_buffer->state.gfx.vb_dirty = BITFIELD_BIT(0);
+ cmd_buffer->state.gfx.dirty |= ~(ANV_CMD_DIRTY_INDEX_BUFFER |
+ ANV_CMD_DIRTY_XFB_ENABLE);
+ cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
+ cmd_buffer->state.gfx.push_constant_stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ vk_dynamic_graphics_state_dirty_all(&cmd_buffer->vk.dynamic_graphics_state);
+}
+
+static void
+genX(cmd_buffer_emit_generate_draws_vertex)(struct anv_cmd_buffer *cmd_buffer,
+ uint32_t draw_count)
+{
+ struct anv_batch *batch = &cmd_buffer->generation_batch;
+ struct anv_state vs_data_state =
+ anv_cmd_buffer_alloc_dynamic_state(
+ cmd_buffer, 9 * sizeof(uint32_t), 32);
+
+ float x0 = 0.0f, x1 = MIN2(draw_count, 8192);
+ float y0 = 0.0f, y1 = DIV_ROUND_UP(draw_count, 8192);
+ float z = 0.0f;
+
+ float *vertices = vs_data_state.map;
+ vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
+ vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
+ vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */
+
+ uint32_t *dw = anv_batch_emitn(batch,
+ 1 + GENX(VERTEX_BUFFER_STATE_length),
+ GENX(3DSTATE_VERTEX_BUFFERS));
+ GENX(VERTEX_BUFFER_STATE_pack)(batch, dw + 1,
+ &(struct GENX(VERTEX_BUFFER_STATE)) {
+ .VertexBufferIndex = 0,
+ .AddressModifyEnable = true,
+ .BufferStartingAddress = (struct anv_address) {
+ .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+ .offset = vs_data_state.offset,
+ },
+ .BufferPitch = 3 * sizeof(float),
+ .BufferSize = 9 * sizeof(float),
+ .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
+#if GFX_VER >= 12
+ .L3BypassDisable = true,
+#endif
+ });
+}
+
+static void
+genX(cmd_buffer_emit_generated_push_data)(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_state push_data_state)
+{
+ struct anv_batch *batch = &cmd_buffer->generation_batch;
+ struct anv_address push_data_addr = anv_state_pool_state_address(
+ &cmd_buffer->device->dynamic_state_pool, push_data_state);
+
+ /* Don't use 3DSTATE_CONSTANT_ALL on Gfx12.0 due to Wa_16011448509 */
+#if GFX_VERx10 > 120
+ const uint32_t num_dwords = GENX(3DSTATE_CONSTANT_ALL_length) +
+ GENX(3DSTATE_CONSTANT_ALL_DATA_length);
+ uint32_t *dw =
+ anv_batch_emitn(batch, num_dwords,
+ GENX(3DSTATE_CONSTANT_ALL),
+ .ShaderUpdateEnable = BITFIELD_BIT(MESA_SHADER_FRAGMENT),
+ .PointerBufferMask = 0x1,
+ .MOCS = anv_mocs(cmd_buffer->device, NULL, 0));
+
+ GENX(3DSTATE_CONSTANT_ALL_DATA_pack)(
+ batch, dw + GENX(3DSTATE_CONSTANT_ALL_length),
+ &(struct GENX(3DSTATE_CONSTANT_ALL_DATA)) {
+ .PointerToConstantBuffer = push_data_addr,
+ .ConstantBufferReadLength = DIV_ROUND_UP(push_data_state.alloc_size, 32),
+ });
+#else
+ /* The Skylake PRM contains the following restriction:
+ *
+ * "The driver must ensure The following case does not occur
+ * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
+ * buffer 3 read length equal to zero committed followed by a
+ * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
+ * zero committed."
+ *
+ * To avoid this, we program the highest slot.
+ */
+ anv_batch_emit(batch, GENX(3DSTATE_CONSTANT_PS), c) {
+ c.MOCS = anv_mocs(cmd_buffer->device, NULL, 0);
+ c.ConstantBody.ReadLength[3] = DIV_ROUND_UP(push_data_state.alloc_size, 32);
+ c.ConstantBody.Buffer[3] = push_data_addr;
+ }
+#endif
+}
+
+static struct anv_generated_indirect_params *
+genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_address generated_cmds_addr,
+ uint32_t generated_cmd_stride,
+ struct anv_address indirect_data_addr,
+ uint32_t indirect_data_stride,
+ struct anv_address draw_id_addr,
+ uint32_t item_base,
+ uint32_t item_count,
+ struct anv_address count_addr,
+ uint32_t max_count,
+ bool indexed)
+{
+ struct anv_batch *batch = &cmd_buffer->generation_batch;
+
+ genX(cmd_buffer_emit_generate_draws_vertex)(cmd_buffer, item_count);
+
+ struct anv_state push_data_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
+ sizeof(struct anv_generated_indirect_params),
+ ANV_UBO_ALIGNMENT);
+
+ struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+
+ struct anv_generated_indirect_params *push_data = push_data_state.map;
+ *push_data = (struct anv_generated_indirect_params) {
+ .draw = {
+ .draw_id_addr = anv_address_physical(draw_id_addr),
+ .indirect_data_addr = anv_address_physical(indirect_data_addr),
+ .indirect_data_stride = indirect_data_stride,
+ .flags = (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
+ (cmd_buffer->state.conditional_render_enabled ?
+ ANV_GENERATED_FLAG_PREDICATED : 0) |
+ ((vs_prog_data->uses_firstvertex ||
+ vs_prog_data->uses_baseinstance) ?
+ ANV_GENERATED_FLAG_BASE : 0) |
+ (vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) |
+ (anv_mocs(cmd_buffer->device, indirect_data_addr.bo,
+ ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
+ ((generated_cmd_stride / 4) << 16),
+ .draw_base = item_base,
+ /* If count_addr is not NULL, we'll edit it through a the command
+ * streamer.
+ */
+ .draw_count = anv_address_is_null(count_addr) ? max_count : 0,
+ .max_draw_count = max_count,
+ .instance_multiplier = pipeline->instance_multiplier,
+ },
+ .indirect_data_addr = anv_address_physical(indirect_data_addr),
+ .generated_cmds_addr = anv_address_physical(generated_cmds_addr),
+ .draw_ids_addr = anv_address_physical(draw_id_addr),
+ };
+
+ if (!anv_address_is_null(count_addr)) {
+ /* Copy the draw count into the push constants so that the generation
+ * gets the value straight away and doesn't even need to access memory.
+ */
+ struct mi_builder b;
+ mi_builder_init(&b, cmd_buffer->device->info, batch);
+ mi_memcpy(&b,
+ anv_address_add((struct anv_address) {
+ .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+ .offset = push_data_state.offset,
+ },
+ offsetof(struct anv_generated_indirect_params, draw.draw_count)),
+ count_addr, 4);
+
+ /* Make sure the memcpy landed for the generating draw call to pick up
+ * the value.
+ */
+ anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
+ pc.CommandStreamerStallEnable = true;
+ }
+ }
+
+ /* Only emit the data after the memcpy above. */
+ genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state);
+
+#if GFX_VER == 9
+ /* Why are the push constants not flushed without a binding table
+ * update??
+ */
+ anv_batch_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), btp) {
+ btp.PointertoPSBindingTable = cmd_buffer->generation_bt_state.offset;
+ }
+#endif
+
+ anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
+ prim.VertexAccessType = SEQUENTIAL;
+ prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
+ prim.VertexCountPerInstance = 3;
+ prim.InstanceCount = 1;
+ }
+
+ return push_data;
+}
+
+static void
+genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_buffer)
+{
+#if GFX_VER >= 12
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
+ arb.PreParserDisableMask = true;
+ arb.PreParserDisable = true;
+ }
+#endif
+
+ anv_batch_emit_ensure_space(&cmd_buffer->generation_batch, 4);
+
+ trace_intel_begin_generate_draws(&cmd_buffer->trace);
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
+ bbs.AddressSpaceIndicator = ASI_PPGTT;
+ bbs.BatchBufferStartAddress =
+ anv_batch_current_address(&cmd_buffer->generation_batch);
+ }
+
+ cmd_buffer->generation_return_addr = anv_batch_current_address(&cmd_buffer->batch);
+
+ trace_intel_end_generate_draws(&cmd_buffer->trace);
+
+ genX(cmd_buffer_emit_generate_draws_pipeline)(cmd_buffer);
+
+}
+
+static struct anv_address
+genX(cmd_buffer_get_draw_id_addr)(struct anv_cmd_buffer *cmd_buffer,
+ uint32_t draw_id_count)
+{
+#if GFX_VER >= 11
+ return ANV_NULL_ADDRESS;
+#else
+ struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+ if (!vs_prog_data->uses_drawid)
+ return ANV_NULL_ADDRESS;
+
+ struct anv_state draw_id_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 4 * draw_id_count, 4);
+ return anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool,
+ draw_id_state);
+#endif
+}
+
+static uint32_t
+genX(cmd_buffer_get_generated_draw_stride)(struct anv_cmd_buffer *cmd_buffer)
+{
+ /* With the extended parameters in 3DPRIMITIVE on Gfx11+ we can emit
+ * everything. Prior to this, we need to emit a couple of
+ * VERTEX_BUFFER_STATE.
+ */
+#if GFX_VER >= 11
+ return 4 * GENX(3DPRIMITIVE_EXTENDED_length);
+#else
+ struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+
+ uint32_t len = 0;
+
+ if (vs_prog_data->uses_firstvertex ||
+ vs_prog_data->uses_baseinstance ||
+ vs_prog_data->uses_drawid) {
+ len += 4; /* 3DSTATE_VERTEX_BUFFERS */
+
+ if (vs_prog_data->uses_firstvertex ||
+ vs_prog_data->uses_baseinstance)
+ len += 4 * GENX(VERTEX_BUFFER_STATE_length);
+
+ if (vs_prog_data->uses_drawid)
+ len += 4 * GENX(VERTEX_BUFFER_STATE_length);
+ }
+
+ return len + 4 * GENX(3DPRIMITIVE_length);
+#endif
+}
+
+static void
+genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_generated_indirect_params *params)
+{
+ /* We don't know the end_addr until we have emitted all the generation
+ * draws. Go and edit the address of all the push parameters.
+ */
+ uint64_t end_addr =
+ anv_address_physical(anv_batch_current_address(&cmd_buffer->batch));
+ while (params != NULL) {
+ params->draw.end_addr = end_addr;
+ params = params->prev;
+ }
+}
+
+static void
+genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_address indirect_data_addr,
+ uint32_t indirect_data_stride,
+ struct anv_address count_addr,
+ uint32_t max_draw_count,
+ bool indexed)
+{
+ const bool start_generation_batch =
+ anv_address_is_null(cmd_buffer->generation_return_addr);
+
+ genX(flush_pipeline_select_3d)(cmd_buffer);
+
+ struct anv_address draw_id_addr =
+ genX(cmd_buffer_get_draw_id_addr)(cmd_buffer, max_draw_count);
+
+#if GFX_VER == 9
+ /* Mark the VB-0 as using the entire dynamic state pool area, but only for
+ * the draw call starting the generation batch. All the following ones will
+ * use the same area.
+ */
+ if (start_generation_batch) {
+ genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, 0,
+ (struct anv_address) {
+ .offset = DYNAMIC_STATE_POOL_MIN_ADDRESS,
+ },
+ DYNAMIC_STATE_POOL_SIZE);
+ }
+
+ struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+
+ if (vs_prog_data->uses_baseinstance ||
+ vs_prog_data->uses_firstvertex) {
+ /* We're using the indirect buffer directly to source base instance &
+ * first vertex values. Mark the entire area as used.
+ */
+ genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
+ indirect_data_addr,
+ indirect_data_stride * max_draw_count);
+ }
+
+ if (vs_prog_data->uses_drawid) {
+ /* Mark the whole draw id buffer as used. */
+ genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
+ draw_id_addr,
+ sizeof(uint32_t) * max_draw_count);
+ }
+#endif
+
+ /* Apply the pipeline flush here so the indirect data is available for the
+ * generation shader.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
+ if (start_generation_batch)
+ genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
+
+ /* In order to have the vertex fetch gather the data we need to have a non
+ * 0 stride. It's possible to have a 0 stride given by the application when
+ * draw_count is 1, but we need a correct value for the
+ * VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this
+ * correctly :
+ *
+ * Vulkan spec, vkCmdDrawIndirect:
+ *
+ * "If drawCount is less than or equal to one, stride is ignored."
+ */
+ assert(indirect_data_stride > 0);
+
+ if (cmd_buffer->state.conditional_render_enabled)
+ genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
+
+ /* Emit the 3D state in the main batch. */
+ genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
+
+ const uint32_t draw_cmd_stride =
+ genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer);
+
+ struct anv_generated_indirect_params *last_params = NULL;
+ uint32_t item_base = 0;
+ while (item_base < max_draw_count) {
+ const uint32_t item_count = MIN2(max_draw_count - item_base,
+ MAX_GENERATED_DRAW_COUNT);
+ const uint32_t draw_cmd_size = item_count * draw_cmd_stride;
+
+ /* Ensure we have enough contiguous space for all the draws so that the
+ * compute shader can edit all the 3DPRIMITIVEs from a single base
+ * address.
+ *
+ * TODO: we might have to split that if the amount of space is to large (at
+ * 1Mb?).
+ */
+ VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch,
+ draw_cmd_size);
+ if (result != VK_SUCCESS)
+ return;
+
+ struct anv_generated_indirect_params *params =
+ genX(cmd_buffer_emit_generate_draws)(
+ cmd_buffer,
+ anv_batch_current_address(&cmd_buffer->batch),
+ draw_cmd_stride,
+ anv_address_add(indirect_data_addr,
+ item_base * indirect_data_stride),
+ indirect_data_stride,
+ anv_address_add(draw_id_addr, 4 * item_base),
+ item_base,
+ item_count,
+ count_addr,
+ max_draw_count,
+ indexed);
+
+ anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
+
+ item_base += item_count;
+
+ params->prev = last_params;
+ last_params = params;
+ }
+
+ genX(cmd_buffer_rewrite_forward_end_addr)(cmd_buffer, last_params);
+
+#if GFX_VER == 9
+ update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, indexed ? RANDOM : SEQUENTIAL);
+#endif
+}
+
+static void
+genX(cmd_buffer_flush_generated_draws)(struct anv_cmd_buffer *cmd_buffer)
+{
+ /* No return address setup means we don't have to do anything */
+ if (anv_address_is_null(cmd_buffer->generation_return_addr))
+ return;
+
+ struct anv_batch *batch = &cmd_buffer->generation_batch;
+
+ /* Wait for all the generation vertex shader to generate the commands. */
+ genX(emit_apply_pipe_flushes)(batch,
+ cmd_buffer->device,
+ _3D,
+#if GFX_VER == 9
+ ANV_PIPE_VF_CACHE_INVALIDATE_BIT |
+#endif
+ ANV_PIPE_DATA_CACHE_FLUSH_BIT |
+ ANV_PIPE_CS_STALL_BIT,
+ NULL /* query_bits */);
+
+#if GFX_VER >= 12
+ anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) {
+ arb.PreParserDisableMask = true;
+ arb.PreParserDisable = false;
+ }
+#else
+ /* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter
+ * as the prefetch shouldn't follow the MI_BATCH_BUFFER_START.
+ */
+#endif
+
+ /* Return to the main batch. */
+ anv_batch_emit(batch, GENX(MI_BATCH_BUFFER_START), bbs) {
+ bbs.AddressSpaceIndicator = ASI_PPGTT;
+ bbs.BatchBufferStartAddress = cmd_buffer->generation_return_addr;
+ }
+
+ cmd_buffer->generation_return_addr = ANV_NULL_ADDRESS;
+}
+
+#endif /* GENX_CMD_GENERATED_INDIRECT_DRAW_H */
diff --git a/lib/mesa/src/intel/vulkan/genX_cmd_draw_helpers.h b/lib/mesa/src/intel/vulkan/genX_cmd_draw_helpers.h
new file mode 100644
index 000000000..8db6b5e75
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/genX_cmd_draw_helpers.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef GENX_CMD_DRAW_HELPERS_H
+#define GENX_CMD_DRAW_HELPERS_H
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include "anv_private.h"
+
+#if GFX_VER < 11
+static void
+emit_vertex_bo(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_address addr,
+ uint32_t size, uint32_t index)
+{
+ uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5,
+ GENX(3DSTATE_VERTEX_BUFFERS));
+
+ GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1,
+ &(struct GENX(VERTEX_BUFFER_STATE)) {
+ .VertexBufferIndex = index,
+ .AddressModifyEnable = true,
+ .BufferPitch = 0,
+ .MOCS = anv_mocs(cmd_buffer->device, addr.bo,
+ ISL_SURF_USAGE_VERTEX_BUFFER_BIT),
+ .NullVertexBuffer = size == 0,
+ .BufferStartingAddress = addr,
+ .BufferSize = size
+ });
+
+#if GFX_VER == 9
+ genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer,
+ index, addr, size);
+#endif
+}
+
+static void
+emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_address addr)
+{
+ emit_vertex_bo(cmd_buffer, addr, addr.bo ? 8 : 0, ANV_SVGS_VB_INDEX);
+}
+
+static void
+emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer,
+ uint32_t base_vertex, uint32_t base_instance)
+{
+ if (base_vertex == 0 && base_instance == 0) {
+ emit_base_vertex_instance_bo(cmd_buffer, ANV_NULL_ADDRESS);
+ return;
+ }
+
+ struct anv_state id_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4);
+
+ ((uint32_t *)id_state.map)[0] = base_vertex;
+ ((uint32_t *)id_state.map)[1] = base_instance;
+
+ struct anv_address addr =
+ anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool,
+ id_state);
+
+ emit_base_vertex_instance_bo(cmd_buffer, addr);
+}
+
+static void
+emit_draw_index(struct anv_cmd_buffer *cmd_buffer, uint32_t draw_index)
+{
+ struct anv_state state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 4, 4);
+
+ ((uint32_t *)state.map)[0] = draw_index;
+
+ struct anv_address addr =
+ anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool,
+ state);
+
+ emit_vertex_bo(cmd_buffer, addr, 4, ANV_DRAWID_VB_INDEX);
+}
+#endif /* GFX_VER <= 11 */
+
+static void
+update_dirty_vbs_for_gfx8_vb_flush(struct anv_cmd_buffer *cmd_buffer,
+ uint32_t access_type)
+{
+#if GFX_VER == 9
+ const struct vk_dynamic_graphics_state *dyn =
+ &cmd_buffer->vk.dynamic_graphics_state;
+ struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+
+ uint64_t vb_used = dyn->vi->bindings_valid;
+ if (vs_prog_data->uses_firstvertex ||
+ vs_prog_data->uses_baseinstance)
+ vb_used |= 1ull << ANV_SVGS_VB_INDEX;
+ if (vs_prog_data->uses_drawid)
+ vb_used |= 1ull << ANV_DRAWID_VB_INDEX;
+
+ genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer,
+ access_type,
+ vb_used);
+#endif
+}
+
+#if GFX_VER < 11
+ALWAYS_INLINE static void
+cmd_buffer_emit_vertex_constants_and_flush(struct anv_cmd_buffer *cmd_buffer,
+ const struct brw_vs_prog_data *vs_prog_data,
+ uint32_t base_vertex,
+ uint32_t base_instance,
+ uint32_t draw_id,
+ bool force_flush)
+{
+ bool emitted = false;
+ if (vs_prog_data->uses_firstvertex ||
+ vs_prog_data->uses_baseinstance) {
+ emit_base_vertex_instance(cmd_buffer, base_vertex, base_instance);
+ emitted = true;
+ }
+ if (vs_prog_data->uses_drawid) {
+ emit_draw_index(cmd_buffer, draw_id);
+ emitted = true;
+ }
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ if (emitted || force_flush)
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+}
+#endif
+
+#endif /* GENX_CMD_DRAW_HELPERS_H */
diff --git a/lib/mesa/src/intel/vulkan/genX_video.c b/lib/mesa/src/intel/vulkan/genX_video.c
new file mode 100644
index 000000000..0192d8703
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/genX_video.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright © 2021 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+
+#include "genxml/gen_macros.h"
+#include "genxml/genX_pack.h"
+
+void
+genX(CmdBeginVideoCodingKHR)(VkCommandBuffer commandBuffer,
+ const VkVideoBeginCodingInfoKHR *pBeginInfo)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ ANV_FROM_HANDLE(anv_video_session, vid, pBeginInfo->videoSession);
+ ANV_FROM_HANDLE(anv_video_session_params, params, pBeginInfo->videoSessionParameters);
+
+ cmd_buffer->video.vid = vid;
+ cmd_buffer->video.params = params;
+}
+
+void
+genX(CmdControlVideoCodingKHR)(VkCommandBuffer commandBuffer,
+ const VkVideoCodingControlInfoKHR *pCodingControlInfo)
+{
+
+}
+
+void
+genX(CmdEndVideoCodingKHR)(VkCommandBuffer commandBuffer,
+ const VkVideoEndCodingInfoKHR *pEndCodingInfo)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ cmd_buffer->video.vid = NULL;
+ cmd_buffer->video.params = NULL;
+}
+
+static void
+anv_h264_decode_video(struct anv_cmd_buffer *cmd_buffer,
+ const VkVideoDecodeInfoKHR *frame_info)
+{
+ ANV_FROM_HANDLE(anv_buffer, src_buffer, frame_info->srcBuffer);
+ struct anv_video_session *vid = cmd_buffer->video.vid;
+ struct anv_video_session_params *params = cmd_buffer->video.params;
+ const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
+ vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
+ const StdVideoH264SequenceParameterSet *sps = vk_video_find_h264_dec_std_sps(&params->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
+ const StdVideoH264PictureParameterSet *pps = vk_video_find_h264_dec_std_pps(&params->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) {
+ flush.DWordLength = 2;
+ flush.VideoPipelineCacheInvalidate = 1;
+ };
+
+#if GFX_VER >= 12
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_FORCE_WAKEUP), wake) {
+ wake.MFXPowerWellControl = 1;
+ wake.MaskBits = 768;
+ }
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
+ mfx.MFXSyncControlFlag = 1;
+ }
+#endif
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_PIPE_MODE_SELECT), sel) {
+ sel.StandardSelect = SS_AVC;
+ sel.CodecSelect = Decode;
+ sel.DecoderShortFormatMode = ShortFormatDriverInterface;
+ sel.DecoderModeSelect = VLDMode; // Hardcoded
+
+ sel.PreDeblockingOutputEnable = 0;
+ sel.PostDeblockingOutputEnable = 1;
+ }
+
+#if GFX_VER >= 12
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
+ mfx.MFXSyncControlFlag = 1;
+ }
+#endif
+
+ const struct anv_image_view *iv = anv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
+ const struct anv_image *img = iv->image;
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_SURFACE_STATE), ss) {
+ ss.Width = img->vk.extent.width - 1;
+ ss.Height = img->vk.extent.height - 1;
+ ss.SurfaceFormat = PLANAR_420_8; // assert on this?
+ ss.InterleaveChroma = 1;
+ ss.SurfacePitch = img->planes[0].primary_surface.isl.row_pitch_B - 1;
+ ss.TiledSurface = img->planes[0].primary_surface.isl.tiling != ISL_TILING_LINEAR;
+ ss.TileWalk = TW_YMAJOR;
+
+ ss.YOffsetforUCb = ss.YOffsetforVCr =
+ img->planes[1].primary_surface.memory_range.offset / img->planes[0].primary_surface.isl.row_pitch_B;
+ }
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_PIPE_BUF_ADDR_STATE), buf) {
+ bool use_pre_deblock = false;
+ if (use_pre_deblock) {
+ buf.PreDeblockingDestinationAddress = anv_image_address(img,
+ &img->planes[0].primary_surface.memory_range);
+ } else {
+ buf.PostDeblockingDestinationAddress = anv_image_address(img,
+ &img->planes[0].primary_surface.memory_range);
+ }
+ buf.PreDeblockingDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, buf.PreDeblockingDestinationAddress.bo, 0),
+ };
+ buf.PostDeblockingDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, buf.PostDeblockingDestinationAddress.bo, 0),
+ };
+
+ buf.IntraRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_INTRA_ROW_STORE].mem->bo, vid->vid_mem[ANV_VID_MEM_H264_INTRA_ROW_STORE].offset };
+ buf.IntraRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, buf.IntraRowStoreScratchBufferAddress.bo, 0),
+ };
+ buf.DeblockingFilterRowStoreScratchAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE].mem->bo, vid->vid_mem[ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE].offset };
+ buf.DeblockingFilterRowStoreScratchAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterRowStoreScratchAddress.bo, 0),
+ };
+ buf.MBStatusBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
+ };
+ buf.MBILDBStreamOutBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
+ };
+ buf.SecondMBILDBStreamOutBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
+ };
+ buf.ScaledReferenceSurfaceAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
+ };
+ buf.OriginalUncompressedPictureSourceAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
+ };
+ buf.StreamOutDataDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
+ };
+
+ struct anv_bo *ref_bo = NULL;
+ for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
+ const struct anv_image_view *ref_iv = anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
+ int idx = frame_info->pReferenceSlots[i].slotIndex;
+ buf.ReferencePictureAddress[idx] = anv_image_address(ref_iv->image,
+ &ref_iv->image->planes[0].primary_surface.memory_range);
+
+ if (i == 0) {
+ ref_bo = ref_iv->image->bindings[0].address.bo;
+ }
+ }
+ buf.ReferencePictureAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, ref_bo, 0),
+ };
+ }
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_IND_OBJ_BASE_ADDR_STATE), index_obj) {
+ index_obj.MFXIndirectBitstreamObjectAddress = anv_address_add(src_buffer->address,
+ frame_info->srcBufferOffset & ~4095);
+ index_obj.MFXIndirectBitstreamObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, src_buffer->address.bo, 0),
+ };
+ index_obj.MFXIndirectMVObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
+ };
+ index_obj.MFDIndirectITCOEFFObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
+ };
+ index_obj.MFDIndirectITDBLKObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
+ };
+ index_obj.MFCIndirectPAKBSEObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
+ };
+ }
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_BSP_BUF_BASE_ADDR_STATE), bsp) {
+ bsp.BSDMPCRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].mem->bo,
+ vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].offset };
+
+ bsp.BSDMPCRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, bsp.BSDMPCRowStoreScratchBufferAddress.bo, 0),
+ };
+ bsp.MPRRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_MPR_ROW_SCRATCH].mem->bo,
+ vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].offset };
+
+ bsp.MPRRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, bsp.MPRRowStoreScratchBufferAddress.bo, 0),
+ };
+ bsp.BitplaneReadBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
+ };
+ }
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_DPB_STATE), avc_dpb) {
+ for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
+ const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
+ vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
+ const StdVideoDecodeH264ReferenceInfo *ref_info = dpb_slot->pStdReferenceInfo;
+ int idx = frame_info->pReferenceSlots[i].slotIndex;
+ avc_dpb.NonExistingFrame[idx] = ref_info->flags.is_non_existing;
+ avc_dpb.LongTermFrame[idx] = ref_info->flags.used_for_long_term_reference;
+ if (!ref_info->flags.top_field_flag && !ref_info->flags.bottom_field_flag)
+ avc_dpb.UsedforReference[idx] = 3;
+ else
+ avc_dpb.UsedforReference[idx] = ref_info->flags.top_field_flag | (ref_info->flags.bottom_field_flag << 1);
+ avc_dpb.LTSTFrameNumberList[idx] = ref_info->FrameNum;
+ }
+ }
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_PICID_STATE), picid) {
+ picid.PictureIDRemappingDisable = true;
+ }
+
+ uint32_t pic_height = sps->pic_height_in_map_units_minus1 + 1;
+ if (!sps->flags.frame_mbs_only_flag)
+ pic_height *= 2;
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_AVC_IMG_STATE), avc_img) {
+ avc_img.FrameWidth = sps->pic_width_in_mbs_minus1;
+ avc_img.FrameHeight = pic_height - 1;
+ avc_img.FrameSize = (sps->pic_width_in_mbs_minus1 + 1) * pic_height;
+
+ if (!h264_pic_info->pStdPictureInfo->flags.field_pic_flag)
+ avc_img.ImageStructure = FramePicture;
+ else if (h264_pic_info->pStdPictureInfo->flags.bottom_field_flag)
+ avc_img.ImageStructure = BottomFieldPicture;
+ else
+ avc_img.ImageStructure = TopFieldPicture;
+
+ avc_img.WeightedBiPredictionIDC = pps->weighted_bipred_idc;
+ avc_img.WeightedPredictionEnable = pps->flags.weighted_pred_flag;
+ avc_img.FirstChromaQPOffset = pps->chroma_qp_index_offset;
+ avc_img.SecondChromaQPOffset = pps->second_chroma_qp_index_offset;
+ avc_img.FieldPicture = h264_pic_info->pStdPictureInfo->flags.field_pic_flag;
+ avc_img.MBAFFMode = (sps->flags.mb_adaptive_frame_field_flag &&
+ !h264_pic_info->pStdPictureInfo->flags.field_pic_flag);
+ avc_img.FrameMBOnly = sps->flags.frame_mbs_only_flag;
+ avc_img._8x8IDCTTransformMode = pps->flags.transform_8x8_mode_flag;
+ avc_img.Direct8x8Inference = sps->flags.direct_8x8_inference_flag;
+ avc_img.ConstrainedIntraPrediction = pps->flags.constrained_intra_pred_flag;
+ avc_img.NonReferencePicture = !h264_pic_info->pStdPictureInfo->flags.is_reference;
+ avc_img.EntropyCodingSyncEnable = pps->flags.entropy_coding_mode_flag;
+ avc_img.ChromaFormatIDC = sps->chroma_format_idc;
+ avc_img.TrellisQuantizationChromaDisable = true;
+ avc_img.NumberofReferenceFrames = frame_info->referenceSlotCount;
+ avc_img.NumberofActiveReferencePicturesfromL0 = pps->num_ref_idx_l0_default_active_minus1 + 1;
+ avc_img.NumberofActiveReferencePicturesfromL1 = pps->num_ref_idx_l1_default_active_minus1 + 1;
+ avc_img.InitialQPValue = pps->pic_init_qp_minus26;
+ avc_img.PicOrderPresent = pps->flags.bottom_field_pic_order_in_frame_present_flag;
+ avc_img.DeltaPicOrderAlwaysZero = sps->flags.delta_pic_order_always_zero_flag;
+ avc_img.PicOrderCountType = sps->pic_order_cnt_type;
+ avc_img.DeblockingFilterControlPresent = pps->flags.deblocking_filter_control_present_flag;
+ avc_img.RedundantPicCountPresent = pps->flags.redundant_pic_cnt_present_flag;
+ avc_img.Log2MaxFrameNumber = sps->log2_max_frame_num_minus4;
+ avc_img.Log2MaxPicOrderCountLSB = sps->log2_max_pic_order_cnt_lsb_minus4;
+ avc_img.CurrentPictureFrameNumber = h264_pic_info->pStdPictureInfo->frame_num;
+ }
+
+ if (pps->flags.pic_scaling_matrix_present_flag) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
+ qm.DWordLength = 16;
+ qm.AVC = AVC_4x4_Intra_MATRIX;
+ for (unsigned m = 0; m < 3; m++)
+ for (unsigned q = 0; q < 16; q++)
+ qm.ForwardQuantizerMatrix[m * 16 + q] = pps->pScalingLists->ScalingList4x4[m][q];
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
+ qm.DWordLength = 16;
+ qm.AVC = AVC_4x4_Inter_MATRIX;
+ for (unsigned m = 0; m < 3; m++)
+ for (unsigned q = 0; q < 16; q++)
+ qm.ForwardQuantizerMatrix[m * 16 + q] = pps->pScalingLists->ScalingList4x4[m + 3][q];
+ }
+ if (pps->flags.transform_8x8_mode_flag) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
+ qm.DWordLength = 16;
+ qm.AVC = AVC_8x8_Intra_MATRIX;
+ for (unsigned q = 0; q < 64; q++)
+ qm.ForwardQuantizerMatrix[q] = pps->pScalingLists->ScalingList8x8[0][q];
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
+ qm.DWordLength = 16;
+ qm.AVC = AVC_8x8_Inter_MATRIX;
+ for (unsigned q = 0; q < 64; q++)
+ qm.ForwardQuantizerMatrix[q] = pps->pScalingLists->ScalingList8x8[3][q];
+ }
+ }
+ } else if (sps->flags.seq_scaling_matrix_present_flag) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
+ qm.DWordLength = 16;
+ qm.AVC = AVC_4x4_Intra_MATRIX;
+ for (unsigned m = 0; m < 3; m++)
+ for (unsigned q = 0; q < 16; q++)
+ qm.ForwardQuantizerMatrix[m * 16 + q] = sps->pScalingLists->ScalingList4x4[m][q];
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
+ qm.DWordLength = 16;
+ qm.AVC = AVC_4x4_Inter_MATRIX;
+ for (unsigned m = 0; m < 3; m++)
+ for (unsigned q = 0; q < 16; q++)
+ qm.ForwardQuantizerMatrix[m * 16 + q] = sps->pScalingLists->ScalingList4x4[m + 3][q];
+ }
+ if (pps->flags.transform_8x8_mode_flag) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
+ qm.DWordLength = 16;
+ qm.AVC = AVC_8x8_Intra_MATRIX;
+ for (unsigned q = 0; q < 64; q++)
+ qm.ForwardQuantizerMatrix[q] = sps->pScalingLists->ScalingList8x8[0][q];
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
+ qm.DWordLength = 16;
+ qm.AVC = AVC_8x8_Inter_MATRIX;
+ for (unsigned q = 0; q < 64; q++)
+ qm.ForwardQuantizerMatrix[q] = sps->pScalingLists->ScalingList8x8[3][q];
+ }
+ }
+ } else {
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
+ qm.DWordLength = 16;
+ qm.AVC = AVC_4x4_Intra_MATRIX;
+ for (unsigned q = 0; q < 3 * 16; q++)
+ qm.ForwardQuantizerMatrix[q] = 0x10;
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
+ qm.DWordLength = 16;
+ qm.AVC = AVC_4x4_Inter_MATRIX;
+ for (unsigned q = 0; q < 3 * 16; q++)
+ qm.ForwardQuantizerMatrix[q] = 0x10;
+ }
+ if (pps->flags.transform_8x8_mode_flag) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
+ qm.DWordLength = 16;
+ qm.AVC = AVC_8x8_Intra_MATRIX;
+ for (unsigned q = 0; q < 64; q++)
+ qm.ForwardQuantizerMatrix[q] = 0x10;
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
+ qm.DWordLength = 16;
+ qm.AVC = AVC_8x8_Inter_MATRIX;
+ for (unsigned q = 0; q < 64; q++)
+ qm.ForwardQuantizerMatrix[q] = 0x10;
+ }
+ }
+ }
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFX_AVC_DIRECTMODE_STATE), avc_directmode) {
+ /* bind reference frame DMV */
+ struct anv_bo *dmv_bo = NULL;
+ for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
+ int idx = frame_info->pReferenceSlots[i].slotIndex;
+ const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
+ vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
+ const struct anv_image_view *ref_iv = anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
+ const StdVideoDecodeH264ReferenceInfo *ref_info = dpb_slot->pStdReferenceInfo;
+ avc_directmode.DirectMVBufferAddress[idx] = anv_image_address(ref_iv->image,
+ &ref_iv->image->vid_dmv_top_surface);
+ if (i == 0) {
+ dmv_bo = ref_iv->image->bindings[0].address.bo;
+ }
+ avc_directmode.POCList[2 * idx] = ref_info->PicOrderCnt[0];
+ avc_directmode.POCList[2 * idx + 1] = ref_info->PicOrderCnt[1];
+ }
+ avc_directmode.DirectMVBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, dmv_bo, 0),
+ };
+
+ avc_directmode.DirectMVBufferWriteAddress = anv_image_address(img,
+ &img->vid_dmv_top_surface);
+ avc_directmode.DirectMVBufferWriteAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
+ .MOCS = anv_mocs(cmd_buffer->device, img->bindings[0].address.bo, 0),
+ };
+ avc_directmode.POCList[32] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
+ avc_directmode.POCList[33] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
+ }
+
+ uint32_t buffer_offset = frame_info->srcBufferOffset & 4095;
+#define HEADER_OFFSET 3
+ for (unsigned s = 0; s < h264_pic_info->sliceCount; s++) {
+ bool last_slice = s == (h264_pic_info->sliceCount - 1);
+ uint32_t current_offset = h264_pic_info->pSliceOffsets[s];
+ uint32_t this_end;
+ if (!last_slice) {
+ uint32_t next_offset = h264_pic_info->pSliceOffsets[s + 1];
+ uint32_t next_end = h264_pic_info->pSliceOffsets[s + 2];
+ if (s == h264_pic_info->sliceCount - 2)
+ next_end = frame_info->srcBufferRange;
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_SLICEADDR), sliceaddr) {
+ sliceaddr.IndirectBSDDataLength = next_end - next_offset - HEADER_OFFSET;
+ /* start decoding after the 3-byte header. */
+ sliceaddr.IndirectBSDDataStartAddress = buffer_offset + next_offset + HEADER_OFFSET;
+ };
+ this_end = next_offset;
+ } else
+ this_end = frame_info->srcBufferRange;
+ anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_BSD_OBJECT), avc_bsd) {
+ avc_bsd.IndirectBSDDataLength = this_end - current_offset - HEADER_OFFSET;
+ /* start decoding after the 3-byte header. */
+ avc_bsd.IndirectBSDDataStartAddress = buffer_offset + current_offset + HEADER_OFFSET;
+ avc_bsd.InlineData.LastSlice = last_slice;
+ avc_bsd.InlineData.FixPrevMBSkipped = 1;
+ avc_bsd.InlineData.IntraPredictionErrorControl = 1;
+ avc_bsd.InlineData.Intra8x84x4PredictionErrorConcealmentControl = 1;
+ avc_bsd.InlineData.ISliceConcealmentMode = 1;
+ };
+ }
+}
+
+void
+genX(CmdDecodeVideoKHR)(VkCommandBuffer commandBuffer,
+ const VkVideoDecodeInfoKHR *frame_info)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ switch (cmd_buffer->video.vid->vk.op) {
+ case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
+ anv_h264_decode_video(cmd_buffer, frame_info);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+#ifdef VK_ENABLE_BETA_EXTENSIONS
+void
+genX(CmdEncodeVideoKHR)(VkCommandBuffer commandBuffer,
+ const VkVideoEncodeInfoKHR *pEncodeInfo)
+{
+}
+#endif
diff --git a/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c b/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c
index 34337c21f..f9c13954d 100644
--- a/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c
+++ b/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c
@@ -55,7 +55,9 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
pc.RenderTargetCacheFlushEnable = true;
#if GFX_VER >= 12
pc.TileCacheFlushEnable = true;
+#endif
+#if INTEL_NEEDS_WA_1409600907
/* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must
* be set with any PIPE_CONTROL with Depth Flush Enable bit set.
*/
@@ -209,6 +211,24 @@ want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer,
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
}
+static UNUSED bool
+geom_or_tess_prim_id_used(struct anv_graphics_pipeline *pipeline)
+{
+ const struct brw_tcs_prog_data *tcs_prog_data =
+ anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL) ?
+ get_tcs_prog_data(pipeline) : NULL;
+ const struct brw_tes_prog_data *tes_prog_data =
+ anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ?
+ get_tes_prog_data(pipeline) : NULL;
+ const struct brw_gs_prog_data *gs_prog_data =
+ anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) ?
+ get_gs_prog_data(pipeline) : NULL;
+
+ return (tcs_prog_data && tcs_prog_data->include_primitive_id) ||
+ (tes_prog_data && tes_prog_data->include_primitive_id) ||
+ (gs_prog_data && gs_prog_data->include_primitive_id);
+}
+
static void
genX(cmd_emit_te)(struct anv_cmd_buffer *cmd_buffer)
{
@@ -230,7 +250,21 @@ genX(cmd_emit_te)(struct anv_cmd_buffer *cmd_buffer)
te.MaximumTessellationFactorOdd = 63.0;
te.MaximumTessellationFactorNotOdd = 64.0;
#if GFX_VERx10 >= 125
- te.TessellationDistributionMode = TEDMODE_RR_FREE;
+ if (intel_needs_workaround(cmd_buffer->device->info, 22012785325))
+ te.TessellationDistributionMode = TEDMODE_RR_STRICT;
+ else
+ te.TessellationDistributionMode = TEDMODE_RR_FREE;
+
+ if (intel_needs_workaround(cmd_buffer->device->info, 14015297576)) {
+ /* Wa_14015297576:
+ *
+ * Disable Tessellation Distribution when primitive Id is enabled.
+ */
+ if (pipeline->primitive_id_override ||
+ geom_or_tess_prim_id_used(pipeline))
+ te.TessellationDistributionMode = TEDMODE_OFF;
+ }
+
te.TessellationDistributionLevel = TEDLEVEL_PATCH;
/* 64_TRIANGLES */
te.SmallPatchThreshold = 3;
@@ -315,7 +349,8 @@ genX(emit_shading_rate)(struct anv_batch *batch,
const struct vk_fragment_shading_rate_state *fsr)
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
- const bool cps_enable = wm_prog_data && wm_prog_data->per_coarse_pixel_dispatch;
+ const bool cps_enable = wm_prog_data &&
+ brw_wm_prog_data_is_coarse(wm_prog_data, 0);
#if GFX_VER == 11
anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) {
@@ -392,6 +427,36 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
&cmd_buffer->vk.dynamic_graphics_state;
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI)) {
+ const uint32_t ve_count =
+ pipeline->vs_input_elements + pipeline->svgs_count;
+ const uint32_t num_dwords = 1 + 2 * MAX2(1, ve_count);
+ uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
+ GENX(3DSTATE_VERTEX_ELEMENTS));
+
+ if (p) {
+ if (ve_count == 0) {
+ memcpy(p + 1, cmd_buffer->device->empty_vs_input,
+ sizeof(cmd_buffer->device->empty_vs_input));
+ } else if (ve_count == pipeline->vertex_input_elems) {
+ /* MESA_VK_DYNAMIC_VI is not dynamic for this pipeline, so
+ * everything is in pipeline->vertex_input_data and we can just
+ * memcpy
+ */
+ memcpy(p + 1, pipeline->vertex_input_data, 4 * 2 * ve_count);
+ } else {
+ /* Use dyn->vi to emit the dynamic VERTEX_ELEMENT_STATE input. */
+ genX(emit_vertex_input)(&cmd_buffer->batch, p + 1,
+ pipeline, dyn->vi);
+ /* Then append the VERTEX_ELEMENT_STATE for the draw parameters */
+ memcpy(p + 1 + 2 * pipeline->vs_input_elements,
+ pipeline->vertex_input_data,
+ 4 * 2 * pipeline->vertex_input_elems);
+ }
+ }
+ }
+
+ if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) {
genX(cmd_emit_te)(cmd_buffer);
}
@@ -650,8 +715,12 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
#endif
if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS))
- genX(emit_sample_pattern)(&cmd_buffer->batch, dyn->ms.sample_locations);
+ (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE))) {
+ genX(emit_sample_pattern)(&cmd_buffer->batch,
+ dyn->ms.sample_locations_enable ?
+ dyn->ms.sample_locations : NULL);
+ }
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) ||
diff --git a/lib/mesa/src/intel/vulkan/grl/genX_grl.h b/lib/mesa/src/intel/vulkan/grl/genX_grl.h
index 6617e210b..57aefa72d 100644
--- a/lib/mesa/src/intel/vulkan/grl/genX_grl.h
+++ b/lib/mesa/src/intel/vulkan/grl/genX_grl.h
@@ -24,13 +24,15 @@
#ifndef ANV_GRL_H
#define ANV_GRL_H
+#include "grl/grl_cl_kernel.h"
+#include "genxml/gen_macros.h"
+
#ifdef __cplusplus
extern "C" {
#endif
-#include "anv_private.h"
-#include "grl/grl_cl_kernel.h"
-#include "genxml/gen_macros.h"
+struct anv_cmd_buffer;
+struct anv_kernel_arg;
void
genX(grl_dispatch)(struct anv_cmd_buffer *cmd_buffer,
@@ -42,6 +44,9 @@ genX(grl_dispatch)(struct anv_cmd_buffer *cmd_buffer,
void
genX(grl_load_rt_uuid)(uint8_t *out_uuid);
+uint32_t
+genX(grl_max_scratch_size)(void);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/lib/mesa/src/intel/vulkan/grl/genX_grl_dispatch.c b/lib/mesa/src/intel/vulkan/grl/genX_grl_dispatch.c
index a320e6faa..eff7c4074 100644
--- a/lib/mesa/src/intel/vulkan/grl/genX_grl_dispatch.c
+++ b/lib/mesa/src/intel/vulkan/grl/genX_grl_dispatch.c
@@ -21,6 +21,7 @@
* IN THE SOFTWARE.
*/
+#include "anv_private.h"
#include "genX_grl.h"
static struct anv_shader_bin *
@@ -89,3 +90,19 @@ genX(grl_dispatch)(struct anv_cmd_buffer *cmd_buffer,
genX(cmd_buffer_dispatch_kernel)(cmd_buffer, &ak, global_size,
arg_count, args);
}
+
+uint32_t
+genX(grl_max_scratch_size)(void)
+{
+ uint32_t scratch_size = 0;
+
+ for (uint32_t i = 0; i < GRL_CL_KERNEL_MAX; i++) {
+ struct brw_kernel kernel_data;
+ genX(grl_get_cl_kernel)(&kernel_data, i);
+
+ scratch_size = MAX2(kernel_data.prog_data.base.total_scratch,
+ scratch_size);
+ }
+
+ return scratch_size;
+}
diff --git a/lib/mesa/src/intel/vulkan/grl/genX_grl_uuid.cpp b/lib/mesa/src/intel/vulkan/grl/genX_grl_uuid.cpp
index 9f4335892..cf6b425fe 100644
--- a/lib/mesa/src/intel/vulkan/grl/genX_grl_uuid.cpp
+++ b/lib/mesa/src/intel/vulkan/grl/genX_grl_uuid.cpp
@@ -24,15 +24,16 @@
#include <assert.h>
#include <string.h>
+#include "genX_grl.h"
#include "include/GRLGen12.h"
#include "vulkan/vulkan_core.h"
extern "C" void
-gfx125_grl_load_rt_uuid(uint8_t *out_uuid);
+genX(grl_load_rt_uuid)(uint8_t *out_uuid);
extern "C" void
-gfx125_grl_load_rt_uuid(uint8_t *out_uuid)
+genX(grl_load_rt_uuid)(uint8_t *out_uuid)
{
assert(sizeof(GRL::RTAS::GEN12::BVH_MAGIC) == VK_UUID_SIZE);
memcpy(out_uuid, GRL::RTAS::GEN12::BVH_MAGIC, VK_UUID_SIZE);
diff --git a/lib/mesa/src/intel/vulkan/grl/grl_cl_kernel_gen.py b/lib/mesa/src/intel/vulkan/grl/grl_cl_kernel_gen.py
index 4b0b8babd..c7efeff53 100644
--- a/lib/mesa/src/intel/vulkan/grl/grl_cl_kernel_gen.py
+++ b/lib/mesa/src/intel/vulkan/grl/grl_cl_kernel_gen.py
@@ -36,13 +36,13 @@ TEMPLATE_H = Template(COPYRIGHT + """
#ifndef GRL_CL_KERNEL_H
#define GRL_CL_KERNEL_H
+#include "genxml/gen_macros.h"
+#include "compiler/brw_kernel.h"
+
#ifdef __cplusplus
extern "C" {
#endif
-#include "genxml/gen_macros.h"
-#include "compiler/brw_kernel.h"
-
enum grl_cl_kernel {
% for k in kernels:
GRL_CL_KERNEL_${k.upper()},
@@ -50,7 +50,7 @@ enum grl_cl_kernel {
GRL_CL_KERNEL_MAX,
};
-const char *grl_cl_kernel_name(enum grl_cl_kernel kernel);
+const char *genX(grl_cl_kernel_name)(enum grl_cl_kernel kernel);
const char *genX(grl_get_cl_kernel_sha1)(enum grl_cl_kernel id);
@@ -73,7 +73,7 @@ TEMPLATE_C = Template(COPYRIGHT + """
% endfor
const char *
-grl_cl_kernel_name(enum grl_cl_kernel kernel)
+genX(grl_cl_kernel_name)(enum grl_cl_kernel kernel)
{
switch (kernel) {
% for k in kernels:
diff --git a/lib/mesa/src/intel/vulkan/grl/grl_metakernel_gen.py b/lib/mesa/src/intel/vulkan/grl/grl_metakernel_gen.py
index 029ecf30f..0a14113a3 100644
--- a/lib/mesa/src/intel/vulkan/grl/grl_metakernel_gen.py
+++ b/lib/mesa/src/intel/vulkan/grl/grl_metakernel_gen.py
@@ -866,7 +866,7 @@ C_PROLOGUE = COPYRIGHT + '''
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
-#include "genxml/gen_rt_pack.h"
+#include "genxml/genX_rt_pack.h"
/* We reserve :
* - GPR 14 for secondary command buffer returns
diff --git a/lib/mesa/src/intel/vulkan/grl/meson.build b/lib/mesa/src/intel/vulkan/grl/meson.build
index 979414c07..c0056b349 100644
--- a/lib/mesa/src/intel/vulkan/grl/meson.build
+++ b/lib/mesa/src/intel/vulkan/grl/meson.build
@@ -142,6 +142,7 @@ foreach t : [['125', 'gfx125', 'dg2']]
# without modifying grl source code, remove
# if fixed there
],
+ env: ['MESA_SHADER_CACHE_DISABLE=true'],
depends : [prog_intel_clc]
)
endforeach
@@ -165,11 +166,11 @@ foreach t : [['125', 'gfx125', 'dg2']]
inc_intel,
],
c_args : [
- no_override_init_args, c_sse2_args,
+ no_override_init_args, sse2_args,
'-DGFX_VERx10=@0@'.format(verX10),
],
cpp_args : [
- no_override_init_args, c_sse2_args,
+ sse2_args,
'-DGFX_VERx10=@0@'.format(verX10),
],
dependencies : [
@@ -196,7 +197,6 @@ libgrl = static_library(
],
link_whole : [grl_genX_libs],
dependencies : [libgrl_deps, idep_anv_headers],
- install : true,
)
idep_grl = declare_dependency(
link_with : libgrl,
diff --git a/lib/mesa/src/intel/vulkan/i915/anv_batch_chain.c b/lib/mesa/src/intel/vulkan/i915/anv_batch_chain.c
new file mode 100644
index 000000000..ff6e7d1ae
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/i915/anv_batch_chain.c
@@ -0,0 +1,813 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "i915/anv_batch_chain.h"
+#include "anv_private.h"
+#include "anv_measure.h"
+
+#include "perf/intel_perf.h"
+#include "util/u_debug.h"
+
+#include "drm-uapi/i915_drm.h"
+
+struct anv_execbuf {
+ struct drm_i915_gem_execbuffer2 execbuf;
+
+ struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+
+ struct drm_i915_gem_exec_object2 * objects;
+ uint32_t bo_count;
+ uint32_t bo_array_length;
+ struct anv_bo ** bos;
+
+ uint32_t syncobj_count;
+ uint32_t syncobj_array_length;
+ struct drm_i915_gem_exec_fence * syncobjs;
+ uint64_t * syncobj_values;
+
+ uint32_t cmd_buffer_count;
+ struct anv_query_pool *perf_query_pool;
+
+ const VkAllocationCallbacks * alloc;
+ VkSystemAllocationScope alloc_scope;
+
+ int perf_query_pass;
+};
+
+static void
+anv_execbuf_finish(struct anv_execbuf *exec)
+{
+ vk_free(exec->alloc, exec->syncobjs);
+ vk_free(exec->alloc, exec->syncobj_values);
+ vk_free(exec->alloc, exec->objects);
+ vk_free(exec->alloc, exec->bos);
+}
+
+static void
+anv_execbuf_add_ext(struct anv_execbuf *exec,
+ uint32_t ext_name,
+ struct i915_user_extension *ext)
+{
+ __u64 *iter = &exec->execbuf.cliprects_ptr;
+
+ exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS;
+
+ while (*iter != 0) {
+ iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension;
+ }
+
+ ext->name = ext_name;
+
+ *iter = (uintptr_t) ext;
+}
+
+static VkResult
+anv_execbuf_add_bo_bitset(struct anv_device *device,
+ struct anv_execbuf *exec,
+ uint32_t dep_words,
+ BITSET_WORD *deps,
+ uint32_t extra_flags);
+
+static VkResult
+anv_execbuf_add_bo(struct anv_device *device,
+ struct anv_execbuf *exec,
+ struct anv_bo *bo,
+ struct anv_reloc_list *relocs,
+ uint32_t extra_flags)
+{
+ struct drm_i915_gem_exec_object2 *obj = NULL;
+
+ if (bo->exec_obj_index < exec->bo_count &&
+ exec->bos[bo->exec_obj_index] == bo)
+ obj = &exec->objects[bo->exec_obj_index];
+
+ if (obj == NULL) {
+ /* We've never seen this one before. Add it to the list and assign
+ * an id that we can use later.
+ */
+ if (exec->bo_count >= exec->bo_array_length) {
+ uint32_t new_len = exec->objects ? exec->bo_array_length * 2 : 64;
+
+ struct drm_i915_gem_exec_object2 *new_objects =
+ vk_realloc(exec->alloc, exec->objects,
+ new_len * sizeof(*new_objects), 8, exec->alloc_scope);
+ if (new_objects == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ exec->objects = new_objects;
+
+ struct anv_bo **new_bos =
+ vk_realloc(exec->alloc, exec->bos, new_len * sizeof(*new_bos), 8,
+ exec->alloc_scope);
+ if (new_bos == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ exec->bos = new_bos;
+ exec->bo_array_length = new_len;
+ }
+
+ assert(exec->bo_count < exec->bo_array_length);
+
+ bo->exec_obj_index = exec->bo_count++;
+ obj = &exec->objects[bo->exec_obj_index];
+ exec->bos[bo->exec_obj_index] = bo;
+
+ obj->handle = bo->gem_handle;
+ obj->relocation_count = 0;
+ obj->relocs_ptr = 0;
+ obj->alignment = 0;
+ obj->offset = bo->offset;
+ obj->flags = bo->flags | extra_flags;
+ obj->rsvd1 = 0;
+ obj->rsvd2 = 0;
+ }
+
+ if (extra_flags & EXEC_OBJECT_WRITE) {
+ obj->flags |= EXEC_OBJECT_WRITE;
+ obj->flags &= ~EXEC_OBJECT_ASYNC;
+ }
+
+ if (relocs != NULL) {
+ return anv_execbuf_add_bo_bitset(device, exec, relocs->dep_words,
+ relocs->deps, extra_flags);
+ }
+
+ return VK_SUCCESS;
+}
+
+/* Add BO dependencies to execbuf */
+static VkResult
+anv_execbuf_add_bo_bitset(struct anv_device *device,
+ struct anv_execbuf *exec,
+ uint32_t dep_words,
+ BITSET_WORD *deps,
+ uint32_t extra_flags)
+{
+ for (uint32_t w = 0; w < dep_words; w++) {
+ BITSET_WORD mask = deps[w];
+ while (mask) {
+ int i = u_bit_scan(&mask);
+ uint32_t gem_handle = w * BITSET_WORDBITS + i;
+ struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
+ assert(bo->refcount > 0);
+ VkResult result =
+ anv_execbuf_add_bo(device, exec, bo, NULL, extra_flags);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+anv_execbuf_add_syncobj(struct anv_device *device,
+ struct anv_execbuf *exec,
+ uint32_t syncobj,
+ uint32_t flags,
+ uint64_t timeline_value)
+{
+ if (exec->syncobj_count >= exec->syncobj_array_length) {
+ uint32_t new_len = MAX2(exec->syncobj_array_length * 2, 16);
+
+ struct drm_i915_gem_exec_fence *new_syncobjs =
+ vk_realloc(exec->alloc, exec->syncobjs,
+ new_len * sizeof(*new_syncobjs), 8, exec->alloc_scope);
+ if (new_syncobjs == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ exec->syncobjs = new_syncobjs;
+
+ if (exec->syncobj_values) {
+ uint64_t *new_syncobj_values =
+ vk_realloc(exec->alloc, exec->syncobj_values,
+ new_len * sizeof(*new_syncobj_values), 8,
+ exec->alloc_scope);
+ if (new_syncobj_values == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ exec->syncobj_values = new_syncobj_values;
+ }
+
+ exec->syncobj_array_length = new_len;
+ }
+
+ if (timeline_value && !exec->syncobj_values) {
+ exec->syncobj_values =
+ vk_zalloc(exec->alloc, exec->syncobj_array_length *
+ sizeof(*exec->syncobj_values),
+ 8, exec->alloc_scope);
+ if (!exec->syncobj_values)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ exec->syncobjs[exec->syncobj_count] = (struct drm_i915_gem_exec_fence) {
+ .handle = syncobj,
+ .flags = flags,
+ };
+ if (exec->syncobj_values)
+ exec->syncobj_values[exec->syncobj_count] = timeline_value;
+
+ exec->syncobj_count++;
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+anv_execbuf_add_sync(struct anv_device *device,
+ struct anv_execbuf *execbuf,
+ struct vk_sync *sync,
+ bool is_signal,
+ uint64_t value)
+{
+ /* It's illegal to signal a timeline with value 0 because that's never
+ * higher than the current value. A timeline wait on value 0 is always
+ * trivial because 0 <= uint64_t always.
+ */
+ if ((sync->flags & VK_SYNC_IS_TIMELINE) && value == 0)
+ return VK_SUCCESS;
+
+ if (vk_sync_is_anv_bo_sync(sync)) {
+ struct anv_bo_sync *bo_sync =
+ container_of(sync, struct anv_bo_sync, sync);
+
+ assert(is_signal == (bo_sync->state == ANV_BO_SYNC_STATE_RESET));
+
+ return anv_execbuf_add_bo(device, execbuf, bo_sync->bo, NULL,
+ is_signal ? EXEC_OBJECT_WRITE : 0);
+ } else if (vk_sync_type_is_drm_syncobj(sync->type)) {
+ struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
+
+ if (!(sync->flags & VK_SYNC_IS_TIMELINE))
+ value = 0;
+
+ return anv_execbuf_add_syncobj(device, execbuf, syncobj->syncobj,
+ is_signal ? I915_EXEC_FENCE_SIGNAL :
+ I915_EXEC_FENCE_WAIT,
+ value);
+ }
+
+ unreachable("Invalid sync type");
+}
+
+static VkResult
+setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
+ struct anv_cmd_buffer *cmd_buffer)
+{
+ VkResult result;
+ /* Add surface dependencies (BOs) to the execbuf */
+ result = anv_execbuf_add_bo_bitset(cmd_buffer->device, execbuf,
+ cmd_buffer->surface_relocs.dep_words,
+ cmd_buffer->surface_relocs.deps, 0);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* First, we walk over all of the bos we've seen and add them and their
+ * relocations to the validate list.
+ */
+ struct anv_batch_bo **bbo;
+ u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
+ result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
+ (*bbo)->bo, &(*bbo)->relocs, 0);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ struct anv_bo **bo_entry;
+ u_vector_foreach(bo_entry, &cmd_buffer->dynamic_bos) {
+ result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
+ *bo_entry, NULL, 0);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+pin_state_pool(struct anv_device *device,
+ struct anv_execbuf *execbuf,
+ struct anv_state_pool *pool)
+{
+ anv_block_pool_foreach_bo(bo, &pool->block_pool) {
+ VkResult result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
+ struct anv_queue *queue,
+ struct anv_cmd_buffer **cmd_buffers,
+ uint32_t num_cmd_buffers)
+{
+ struct anv_device *device = queue->device;
+ VkResult result;
+
+ /* Edit the tail of the command buffers to chain them all together if they
+ * can be.
+ */
+ anv_cmd_buffer_chain_command_buffers(cmd_buffers, num_cmd_buffers);
+
+ for (uint32_t i = 0; i < num_cmd_buffers; i++) {
+ anv_measure_submit(cmd_buffers[i]);
+ result = setup_execbuf_for_cmd_buffer(execbuf, cmd_buffers[i]);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ /* Add all the global BOs to the object list for softpin case. */
+ result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pin_state_pool(device, execbuf, &device->dynamic_state_pool);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pin_state_pool(device, execbuf, &device->general_state_pool);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pin_state_pool(device, execbuf, &device->instruction_state_pool);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pin_state_pool(device, execbuf, &device->binding_table_pool);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* Add the BOs for all user allocated memory objects because we can't
+ * track after binding updates of VK_EXT_descriptor_indexing.
+ */
+ list_for_each_entry(struct anv_device_memory, mem,
+ &device->memory_objects, link) {
+ result = anv_execbuf_add_bo(device, execbuf, mem->bo, NULL, 0);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ /* Add all the private BOs from images because we can't track after binding
+ * updates of VK_EXT_descriptor_indexing.
+ */
+ list_for_each_entry(struct anv_image, image,
+ &device->image_private_objects, link) {
+ struct anv_bo *private_bo =
+ image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE].address.bo;
+ result = anv_execbuf_add_bo(device, execbuf, private_bo, NULL, 0);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ struct anv_batch_bo *first_batch_bo =
+ list_first_entry(&cmd_buffers[0]->batch_bos, struct anv_batch_bo, link);
+
+ /* The kernel requires that the last entry in the validation list be the
+ * batch buffer to execute. We can simply swap the element
+ * corresponding to the first batch_bo in the chain with the last
+ * element in the list.
+ */
+ if (first_batch_bo->bo->exec_obj_index != execbuf->bo_count - 1) {
+ uint32_t idx = first_batch_bo->bo->exec_obj_index;
+ uint32_t last_idx = execbuf->bo_count - 1;
+
+ struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
+ assert(execbuf->bos[idx] == first_batch_bo->bo);
+
+ execbuf->objects[idx] = execbuf->objects[last_idx];
+ execbuf->bos[idx] = execbuf->bos[last_idx];
+ execbuf->bos[idx]->exec_obj_index = idx;
+
+ execbuf->objects[last_idx] = tmp_obj;
+ execbuf->bos[last_idx] = first_batch_bo->bo;
+ first_batch_bo->bo->exec_obj_index = last_idx;
+ }
+
+#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
+ if (device->physical->memory.need_clflush)
+ anv_cmd_buffer_clflush(cmd_buffers, num_cmd_buffers);
+#endif
+
+ execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
+ .buffers_ptr = (uintptr_t) execbuf->objects,
+ .buffer_count = execbuf->bo_count,
+ .batch_start_offset = 0,
+ /* We'll fill in batch length later when chaining batches. */
+ .batch_len = 0,
+ .cliprects_ptr = 0,
+ .num_cliprects = 0,
+ .DR1 = 0,
+ .DR4 = 0,
+ .flags = I915_EXEC_NO_RELOC |
+ I915_EXEC_HANDLE_LUT |
+ queue->exec_flags,
+ .rsvd1 = device->context_id,
+ .rsvd2 = 0,
+ };
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue)
+{
+ struct anv_device *device = queue->device;
+ VkResult result = anv_execbuf_add_bo(device, execbuf,
+ device->trivial_batch_bo,
+ NULL, 0);
+ if (result != VK_SUCCESS)
+ return result;
+
+ execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
+ .buffers_ptr = (uintptr_t) execbuf->objects,
+ .buffer_count = execbuf->bo_count,
+ .batch_start_offset = 0,
+ .batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */
+ .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
+ .rsvd1 = device->context_id,
+ .rsvd2 = 0,
+ };
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
+ struct anv_utrace_submit *submit)
+{
+ struct anv_device *device = queue->device;
+
+ /* Always add the workaround BO as it includes a driver identifier for the
+ * error_state.
+ */
+ VkResult result = anv_execbuf_add_bo(device, execbuf,
+ device->workaround_bo,
+ NULL, 0);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = anv_execbuf_add_bo(device, execbuf,
+ submit->batch_bo,
+ &submit->relocs, 0);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = anv_execbuf_add_sync(device, execbuf, submit->sync,
+ true /* is_signal */, 0 /* value */);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (submit->batch_bo->exec_obj_index != execbuf->bo_count - 1) {
+ uint32_t idx = submit->batch_bo->exec_obj_index;
+ uint32_t last_idx = execbuf->bo_count - 1;
+
+ struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
+ assert(execbuf->bos[idx] == submit->batch_bo);
+
+ execbuf->objects[idx] = execbuf->objects[last_idx];
+ execbuf->bos[idx] = execbuf->bos[last_idx];
+ execbuf->bos[idx]->exec_obj_index = idx;
+
+ execbuf->objects[last_idx] = tmp_obj;
+ execbuf->bos[last_idx] = submit->batch_bo;
+ submit->batch_bo->exec_obj_index = last_idx;
+ }
+
+#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
+ if (device->physical->memory.need_clflush)
+ intel_flush_range(submit->batch_bo->map, submit->batch_bo->size);
+#endif
+
+ execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
+ .buffers_ptr = (uintptr_t) execbuf->objects,
+ .buffer_count = execbuf->bo_count,
+ .batch_start_offset = 0,
+ .batch_len = submit->batch.next - submit->batch.start,
+ .flags = I915_EXEC_NO_RELOC |
+ I915_EXEC_HANDLE_LUT |
+ I915_EXEC_FENCE_ARRAY |
+ queue->exec_flags,
+ .rsvd1 = device->context_id,
+ .rsvd2 = 0,
+ .num_cliprects = execbuf->syncobj_count,
+ .cliprects_ptr = (uintptr_t)execbuf->syncobjs,
+ };
+
+ return VK_SUCCESS;
+}
+
+static int
+anv_gem_execbuffer(struct anv_device *device,
+ struct drm_i915_gem_execbuffer2 *execbuf)
+{
+ if (execbuf->flags & I915_EXEC_FENCE_OUT)
+ return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf);
+ else
+ return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
+}
+
+static VkResult
+anv_queue_exec_utrace_locked(struct anv_queue *queue,
+ struct anv_utrace_submit *submit)
+{
+ assert(submit->batch_bo);
+
+ struct anv_device *device = queue->device;
+ struct anv_execbuf execbuf = {
+ .alloc = &device->vk.alloc,
+ .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
+ };
+
+ VkResult result = setup_utrace_execbuf(&execbuf, queue, submit);
+ if (result != VK_SUCCESS)
+ goto error;
+
+ int ret = queue->device->info->no_hw ? 0 :
+ anv_gem_execbuffer(queue->device, &execbuf.execbuf);
+ if (ret)
+ result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
+
+ error:
+ anv_execbuf_finish(&execbuf);
+
+ return result;
+}
+
+static void
+anv_i915_debug_submit(const struct anv_execbuf *execbuf)
+{
+ uint32_t total_size_kb = 0, total_vram_only_size_kb = 0;
+ for (uint32_t i = 0; i < execbuf->bo_count; i++) {
+ const struct anv_bo *bo = execbuf->bos[i];
+ total_size_kb += bo->size / 1024;
+ if (bo->vram_only)
+ total_vram_only_size_kb += bo->size / 1024;
+ }
+
+ fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0 (aperture: %.1fMb, %.1fMb VRAM only)\n",
+ execbuf->execbuf.batch_start_offset, execbuf->execbuf.batch_len,
+ (float)total_size_kb / 1024.0f,
+ (float)total_vram_only_size_kb / 1024.0f);
+ for (uint32_t i = 0; i < execbuf->bo_count; i++) {
+ const struct anv_bo *bo = execbuf->bos[i];
+ uint64_t size = bo->size + bo->_ccs_size;
+
+ fprintf(stderr, " BO: addr=0x%016"PRIx64"-0x%016"PRIx64" size=%7"PRIu64
+ "KB handle=%05u capture=%u vram_only=%u name=%s\n",
+ bo->offset, bo->offset + size - 1, size / 1024, bo->gem_handle,
+ (bo->flags & EXEC_OBJECT_CAPTURE) != 0,
+ bo->vram_only, bo->name);
+ }
+}
+
+VkResult
+i915_queue_exec_locked(struct anv_queue *queue,
+ uint32_t wait_count,
+ const struct vk_sync_wait *waits,
+ uint32_t cmd_buffer_count,
+ struct anv_cmd_buffer **cmd_buffers,
+ uint32_t signal_count,
+ const struct vk_sync_signal *signals,
+ struct anv_query_pool *perf_query_pool,
+ uint32_t perf_query_pass)
+{
+ struct anv_device *device = queue->device;
+ struct anv_utrace_submit *utrace_submit = NULL;
+ struct anv_execbuf execbuf = {
+ .alloc = &queue->device->vk.alloc,
+ .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
+ .perf_query_pass = perf_query_pass,
+ };
+
+ /* Flush the trace points first, they need to be moved */
+ VkResult result =
+ anv_device_utrace_flush_cmd_buffers(queue,
+ cmd_buffer_count,
+ cmd_buffers,
+ &utrace_submit);
+ if (result != VK_SUCCESS)
+ goto error;
+
+ if (utrace_submit && !utrace_submit->batch_bo) {
+ result = anv_execbuf_add_sync(device, &execbuf,
+ utrace_submit->sync,
+ true /* is_signal */,
+ 0);
+ if (result != VK_SUCCESS)
+ goto error;
+
+ /* When The utrace submission doesn't have its own batch buffer*/
+ utrace_submit = NULL;
+ }
+
+ /* Always add the workaround BO as it includes a driver identifier for the
+ * error_state.
+ */
+ result =
+ anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
+ if (result != VK_SUCCESS)
+ goto error;
+
+ for (uint32_t i = 0; i < wait_count; i++) {
+ result = anv_execbuf_add_sync(device, &execbuf,
+ waits[i].sync,
+ false /* is_signal */,
+ waits[i].wait_value);
+ if (result != VK_SUCCESS)
+ goto error;
+ }
+
+ for (uint32_t i = 0; i < signal_count; i++) {
+ result = anv_execbuf_add_sync(device, &execbuf,
+ signals[i].sync,
+ true /* is_signal */,
+ signals[i].signal_value);
+ if (result != VK_SUCCESS)
+ goto error;
+ }
+
+ if (queue->sync) {
+ result = anv_execbuf_add_sync(device, &execbuf,
+ queue->sync,
+ true /* is_signal */,
+ 0 /* signal_value */);
+ if (result != VK_SUCCESS)
+ goto error;
+ }
+
+ if (cmd_buffer_count) {
+ result = setup_execbuf_for_cmd_buffers(&execbuf, queue,
+ cmd_buffers,
+ cmd_buffer_count);
+ } else {
+ result = setup_empty_execbuf(&execbuf, queue);
+ }
+
+ if (result != VK_SUCCESS)
+ goto error;
+
+ const bool has_perf_query =
+ perf_query_pool && perf_query_pass >= 0 && cmd_buffer_count;
+
+ if (INTEL_DEBUG(DEBUG_SUBMIT))
+ anv_i915_debug_submit(&execbuf);
+
+ anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers,
+ perf_query_pool, perf_query_pass);
+
+ if (execbuf.syncobj_values) {
+ execbuf.timeline_fences.fence_count = execbuf.syncobj_count;
+ execbuf.timeline_fences.handles_ptr = (uintptr_t)execbuf.syncobjs;
+ execbuf.timeline_fences.values_ptr = (uintptr_t)execbuf.syncobj_values;
+ anv_execbuf_add_ext(&execbuf,
+ DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
+ &execbuf.timeline_fences.base);
+ } else if (execbuf.syncobjs) {
+ execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
+ execbuf.execbuf.num_cliprects = execbuf.syncobj_count;
+ execbuf.execbuf.cliprects_ptr = (uintptr_t)execbuf.syncobjs;
+ }
+
+ if (has_perf_query) {
+ assert(perf_query_pass < perf_query_pool->n_passes);
+ struct intel_perf_query_info *query_info =
+ perf_query_pool->pass_query[perf_query_pass];
+
+ /* Some performance queries just the pipeline statistic HW, no need for
+ * OA in that case, so no need to reconfigure.
+ */
+ if (!INTEL_DEBUG(DEBUG_NO_OACONFIG) &&
+ (query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
+ query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
+ int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
+ (void *)(uintptr_t) query_info->oa_metrics_set_id);
+ if (ret < 0) {
+ result = vk_device_set_lost(&device->vk,
+ "i915-perf config failed: %s",
+ strerror(errno));
+ }
+ }
+
+ struct anv_bo *pass_batch_bo = perf_query_pool->bo;
+
+ struct drm_i915_gem_exec_object2 query_pass_object = {
+ .handle = pass_batch_bo->gem_handle,
+ .offset = pass_batch_bo->offset,
+ .flags = pass_batch_bo->flags,
+ };
+ struct drm_i915_gem_execbuffer2 query_pass_execbuf = {
+ .buffers_ptr = (uintptr_t) &query_pass_object,
+ .buffer_count = 1,
+ .batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool,
+ perf_query_pass),
+ .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags,
+ .rsvd1 = device->context_id,
+ };
+
+ int ret = queue->device->info->no_hw ? 0 :
+ anv_gem_execbuffer(queue->device, &query_pass_execbuf);
+ if (ret)
+ result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
+ }
+
+ int ret = queue->device->info->no_hw ? 0 :
+ anv_gem_execbuffer(queue->device, &execbuf.execbuf);
+ if (ret) {
+ anv_i915_debug_submit(&execbuf);
+ result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
+ }
+
+ if (result == VK_SUCCESS && queue->sync) {
+ result = vk_sync_wait(&device->vk, queue->sync, 0,
+ VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
+ if (result != VK_SUCCESS)
+ result = vk_queue_set_lost(&queue->vk, "sync wait failed");
+ }
+
+ error:
+ anv_execbuf_finish(&execbuf);
+
+ if (result == VK_SUCCESS && utrace_submit)
+ result = anv_queue_exec_utrace_locked(queue, utrace_submit);
+
+ return result;
+}
+
+VkResult
+i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
+ uint32_t batch_bo_size)
+{
+ struct anv_device *device = queue->device;
+ struct anv_execbuf execbuf = {
+ .alloc = &queue->device->vk.alloc,
+ .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
+ };
+
+ VkResult result = anv_execbuf_add_bo(device, &execbuf, batch_bo, NULL, 0);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
+ .buffers_ptr = (uintptr_t) execbuf.objects,
+ .buffer_count = execbuf.bo_count,
+ .batch_start_offset = 0,
+ .batch_len = batch_bo_size,
+ .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
+ .rsvd1 = device->context_id,
+ .rsvd2 = 0,
+ };
+
+ if (anv_gem_execbuffer(device, &execbuf.execbuf)) {
+ result = vk_device_set_lost(&device->vk, "anv_gem_execbuffer failed: %m");
+ goto fail;
+ }
+
+ result = anv_device_wait(device, batch_bo, INT64_MAX);
+ if (result != VK_SUCCESS)
+ result = vk_device_set_lost(&device->vk,
+ "anv_device_wait failed: %m");
+
+fail:
+ anv_execbuf_finish(&execbuf);
+ return result;
+}
+
+VkResult
+i915_queue_exec_trace(struct anv_queue *queue,
+ struct anv_utrace_submit *submit)
+{
+ assert(submit->batch_bo);
+
+ return anv_queue_exec_utrace_locked(queue, submit);
+}
diff --git a/lib/mesa/src/intel/vulkan/i915/anv_batch_chain.h b/lib/mesa/src/intel/vulkan/i915/anv_batch_chain.h
new file mode 100644
index 000000000..5e3f14fd0
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/i915/anv_batch_chain.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include "vulkan/vulkan_core.h"
+
+#include "vk_sync.h"
+
+struct anv_queue;
+struct anv_bo;
+struct anv_cmd_buffer;
+struct anv_query_pool;
+struct anv_utrace_submit;
+
+VkResult
+i915_queue_exec_trace(struct anv_queue *queue,
+ struct anv_utrace_submit *submit);
+VkResult
+i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
+ uint32_t batch_bo_size);
+VkResult
+i915_queue_exec_locked(struct anv_queue *queue,
+ uint32_t wait_count,
+ const struct vk_sync_wait *waits,
+ uint32_t cmd_buffer_count,
+ struct anv_cmd_buffer **cmd_buffers,
+ uint32_t signal_count,
+ const struct vk_sync_signal *signals,
+ struct anv_query_pool *perf_query_pool,
+ uint32_t perf_query_pass);
diff --git a/lib/mesa/src/intel/vulkan/i915/anv_device.c b/lib/mesa/src/intel/vulkan/i915/anv_device.c
new file mode 100644
index 000000000..ada5a85e8
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/i915/anv_device.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "i915/anv_device.h"
+#include "anv_private.h"
+
+#include "common/intel_defines.h"
+
+#include "drm-uapi/i915_drm.h"
+
+static int
+vk_priority_to_i915(VkQueueGlobalPriorityKHR priority)
+{
+ switch (priority) {
+ case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR:
+ return INTEL_CONTEXT_LOW_PRIORITY;
+ case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR:
+ return INTEL_CONTEXT_MEDIUM_PRIORITY;
+ case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR:
+ return INTEL_CONTEXT_HIGH_PRIORITY;
+ case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR:
+ return INTEL_CONTEXT_REALTIME_PRIORITY;
+ default:
+ unreachable("Invalid priority");
+ }
+}
+
+static int
+anv_gem_set_context_param(int fd, uint32_t context, uint32_t param, uint64_t value)
+{
+ if (param == I915_CONTEXT_PARAM_PRIORITY)
+ value = vk_priority_to_i915(value);
+
+ int err = 0;
+ if (!intel_gem_set_context_param(fd, context, param, value))
+ err = -errno;
+ return err;
+}
+
+static bool
+anv_gem_has_context_priority(int fd, VkQueueGlobalPriorityKHR priority)
+{
+ return !anv_gem_set_context_param(fd, 0, I915_CONTEXT_PARAM_PRIORITY,
+ priority);
+}
+
+VkResult
+anv_i915_physical_device_get_parameters(struct anv_physical_device *device)
+{
+ VkResult result = VK_SUCCESS;
+ int val, fd = device->local_fd;
+
+ if (!intel_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT, &val) || !val) {
+ result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
+ "kernel missing gem wait");
+ return result;
+ }
+
+ if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2, &val) || !val) {
+ result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
+ "kernel missing execbuf2");
+ return result;
+ }
+
+ if (!device->info.has_llc &&
+ (!intel_gem_get_param(fd, I915_PARAM_MMAP_VERSION, &val) || val < 1)) {
+ result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
+ "kernel missing wc mmap");
+ return result;
+ }
+
+ if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &val) || !val) {
+ result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
+ "kernel missing softpin");
+ return result;
+ }
+
+ if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY, &val) || !val) {
+ result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
+ "kernel missing syncobj support");
+ return result;
+ }
+
+ if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC, &val))
+ device->has_exec_async = val;
+ if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE, &val))
+ device->has_exec_capture = val;
+
+ /* Start with medium; sorted low to high */
+ const VkQueueGlobalPriorityKHR priorities[] = {
+ VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
+ VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
+ VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
+ VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
+ };
+ device->max_context_priority = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
+ for (unsigned i = 0; i < ARRAY_SIZE(priorities); i++) {
+ if (!anv_gem_has_context_priority(fd, priorities[i]))
+ break;
+ device->max_context_priority = priorities[i];
+ }
+
+ if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES, &val))
+ device->has_exec_timeline = val;
+
+ return result;
+}
+
+VkResult
+anv_i915_device_setup_context(struct anv_device *device,
+ const VkDeviceCreateInfo *pCreateInfo,
+ const uint32_t num_queues)
+{
+ struct anv_physical_device *physical_device = device->physical;
+ VkResult result = VK_SUCCESS;
+
+ if (device->physical->engine_info) {
+ /* The kernel API supports at most 64 engines */
+ assert(num_queues <= 64);
+ enum intel_engine_class engine_classes[64];
+ int engine_count = 0;
+ for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
+ const VkDeviceQueueCreateInfo *queueCreateInfo =
+ &pCreateInfo->pQueueCreateInfos[i];
+
+ assert(queueCreateInfo->queueFamilyIndex <
+ physical_device->queue.family_count);
+ struct anv_queue_family *queue_family =
+ &physical_device->queue.families[queueCreateInfo->queueFamilyIndex];
+
+ for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++)
+ engine_classes[engine_count++] = queue_family->engine_class;
+ }
+ if (!intel_gem_create_context_engines(device->fd,
+ physical_device->engine_info,
+ engine_count, engine_classes,
+ (uint32_t *)&device->context_id))
+ result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
+ "kernel context creation failed");
+ } else {
+ assert(num_queues == 1);
+ if (!intel_gem_create_context(device->fd, &device->context_id))
+ result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
+ }
+
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* Here we tell the kernel not to attempt to recover our context but
+ * immediately (on the next batchbuffer submission) report that the
+ * context is lost, and we will do the recovery ourselves. In the case
+ * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
+ * the client clean up the pieces.
+ */
+ anv_gem_set_context_param(device->fd, device->context_id,
+ I915_CONTEXT_PARAM_RECOVERABLE, false);
+
+ /* Check if client specified queue priority. */
+ const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority =
+ vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
+ DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
+
+ VkQueueGlobalPriorityKHR priority =
+ queue_priority ? queue_priority->globalPriority :
+ VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
+
+ /* As per spec, the driver implementation may deny requests to acquire
+ * a priority above the default priority (MEDIUM) if the caller does not
+ * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR
+ * is returned.
+ */
+ if (physical_device->max_context_priority >= VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
+ int err = anv_gem_set_context_param(device->fd, device->context_id,
+ I915_CONTEXT_PARAM_PRIORITY,
+ priority);
+ if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
+ result = vk_error(device, VK_ERROR_NOT_PERMITTED_KHR);
+ goto fail_context;
+ }
+ }
+
+ return result;
+
+fail_context:
+ intel_gem_destroy_context(device->fd, device->context_id);
+ return result;
+}
+
+static int
+anv_gem_context_get_reset_stats(int fd, int context,
+ uint32_t *active, uint32_t *pending)
+{
+ struct drm_i915_reset_stats stats = {
+ .ctx_id = context,
+ };
+
+ int ret = intel_ioctl(fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats);
+ if (ret == 0) {
+ *active = stats.batch_active;
+ *pending = stats.batch_pending;
+ }
+
+ return ret;
+}
+
+VkResult
+anv_i915_device_check_status(struct vk_device *vk_device)
+{
+ struct anv_device *device = container_of(vk_device, struct anv_device, vk);
+ uint32_t active = 0, pending = 0;
+ int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
+ &active, &pending);
+ if (ret == -1) {
+ /* We don't know the real error. */
+ return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
+ }
+
+ if (active) {
+ return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
+ } else if (pending) {
+ return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
+ }
+
+ return VK_SUCCESS;
+}
diff --git a/lib/mesa/src/intel/vulkan/i915/anv_device.h b/lib/mesa/src/intel/vulkan/i915/anv_device.h
new file mode 100644
index 000000000..af42c2241
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/i915/anv_device.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include "vulkan/vulkan_core.h"
+#include "vk_device.h"
+
+struct anv_device;
+struct anv_physical_device;
+
+VkResult
+anv_i915_physical_device_get_parameters(struct anv_physical_device *device);
+
+VkResult
+anv_i915_device_setup_context(struct anv_device *device,
+ const VkDeviceCreateInfo *pCreateInfo,
+ const uint32_t num_queues);
+
+VkResult anv_i915_device_check_status(struct vk_device *vk_device);
diff --git a/lib/mesa/src/intel/vulkan/i915/anv_kmd_backend.c b/lib/mesa/src/intel/vulkan/i915/anv_kmd_backend.c
new file mode 100644
index 000000000..a3c26dede
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/i915/anv_kmd_backend.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright © 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sys/mman.h>
+
+#include "anv_private.h"
+
+#include "i915/anv_batch_chain.h"
+
+#include "drm-uapi/i915_drm.h"
+
+static uint32_t
+i915_gem_create(struct anv_device *device,
+ const struct intel_memory_class_instance **regions,
+ uint16_t num_regions, uint64_t size,
+ enum anv_bo_alloc_flags alloc_flags,
+ uint64_t *actual_size)
+{
+ if (unlikely(!device->info->mem.use_class_instance)) {
+ assert(num_regions == 1 &&
+ device->physical->sys.region == regions[0]);
+
+ struct drm_i915_gem_create gem_create = {
+ .size = size,
+ };
+ if (intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create))
+ return 0;
+
+ *actual_size = gem_create.size;
+ return gem_create.handle;
+ }
+
+ struct drm_i915_gem_memory_class_instance i915_regions[2];
+ assert(num_regions <= ARRAY_SIZE(i915_regions));
+
+ for (uint16_t i = 0; i < num_regions; i++) {
+ i915_regions[i].memory_class = regions[i]->klass;
+ i915_regions[i].memory_instance = regions[i]->instance;
+ }
+
+ uint32_t flags = 0;
+ if (alloc_flags & (ANV_BO_ALLOC_MAPPED | ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE) &&
+ !(alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM))
+ if (device->physical->vram_non_mappable.size > 0)
+ flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS;
+
+ struct drm_i915_gem_create_ext_memory_regions ext_regions = {
+ .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
+ .num_regions = num_regions,
+ .regions = (uintptr_t)i915_regions,
+ };
+ struct drm_i915_gem_create_ext gem_create = {
+ .size = size,
+ .extensions = (uintptr_t) &ext_regions,
+ .flags = flags,
+ };
+
+ if (intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &gem_create))
+ return 0;
+
+ *actual_size = gem_create.size;
+ return gem_create.handle;
+}
+
+static void
+i915_gem_close(struct anv_device *device, uint32_t handle)
+{
+ struct drm_gem_close close = {
+ .handle = handle,
+ };
+
+ intel_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+
+static void *
+i915_gem_mmap_offset(struct anv_device *device, struct anv_bo *bo,
+ uint64_t size, uint32_t flags)
+{
+ struct drm_i915_gem_mmap_offset gem_mmap = {
+ .handle = bo->gem_handle,
+ .flags = flags,
+ };
+ if (intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &gem_mmap))
+ return MAP_FAILED;
+
+ return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ device->fd, gem_mmap.offset);
+}
+
+static void *
+i915_gem_mmap_legacy(struct anv_device *device, struct anv_bo *bo, uint64_t offset,
+ uint64_t size, uint32_t flags)
+{
+ struct drm_i915_gem_mmap gem_mmap = {
+ .handle = bo->gem_handle,
+ .offset = offset,
+ .size = size,
+ .flags = flags,
+ };
+ if (intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap))
+ return MAP_FAILED;
+
+ return (void *)(uintptr_t) gem_mmap.addr_ptr;
+}
+
+static uint32_t
+mmap_calc_flags(struct anv_device *device, struct anv_bo *bo,
+ VkMemoryPropertyFlags property_flags)
+{
+ if (device->info->has_local_mem)
+ return I915_MMAP_OFFSET_FIXED;
+
+ uint32_t flags = 0;
+ if (!device->info->has_llc &&
+ (property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
+ flags |= I915_MMAP_WC;
+ if (bo->map_wc)
+ flags |= I915_MMAP_WC;
+ if (!(property_flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT))
+ flags |= I915_MMAP_WC;
+
+ if (likely(device->physical->info.has_mmap_offset))
+ flags = (flags & I915_MMAP_WC) ? I915_MMAP_OFFSET_WC : I915_MMAP_OFFSET_WB;
+ return flags;
+}
+
+static void *
+i915_gem_mmap(struct anv_device *device, struct anv_bo *bo, uint64_t offset,
+ uint64_t size, VkMemoryPropertyFlags property_flags)
+{
+ const uint32_t flags = mmap_calc_flags(device, bo, property_flags);
+
+ if (likely(device->physical->info.has_mmap_offset))
+ return i915_gem_mmap_offset(device, bo, size, flags);
+ return i915_gem_mmap_legacy(device, bo, offset, size, flags);
+}
+
+static int
+i915_gem_vm_bind(struct anv_device *device, struct anv_bo *bo)
+{
+ return 0;
+}
+
+static int
+i915_gem_vm_unbind(struct anv_device *device, struct anv_bo *bo)
+{
+ return 0;
+}
+
+const struct anv_kmd_backend *
+anv_i915_kmd_backend_get(void)
+{
+ static const struct anv_kmd_backend i915_backend = {
+ .gem_create = i915_gem_create,
+ .gem_close = i915_gem_close,
+ .gem_mmap = i915_gem_mmap,
+ .gem_vm_bind = i915_gem_vm_bind,
+ .gem_vm_unbind = i915_gem_vm_unbind,
+ .execute_simple_batch = i915_execute_simple_batch,
+ .queue_exec_locked = i915_queue_exec_locked,
+ .queue_exec_trace = i915_queue_exec_trace,
+ };
+ return &i915_backend;
+}
diff --git a/lib/mesa/src/intel/vulkan/layers/anv_android_layer.c b/lib/mesa/src/intel/vulkan/layers/anv_android_layer.c
new file mode 100644
index 000000000..e36eb820a
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/layers/anv_android_layer.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+
+VKAPI_ATTR VkResult VKAPI_CALL
+android_CreateImageView(VkDevice _device,
+ const VkImageViewCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkImageView *pView)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ const struct util_format_description *fmt =
+ vk_format_description(pCreateInfo->format);
+
+ /* Throw error in case application tries to create ASTC view on gfx125.
+ * This is done to avoid gpu hang that can result in using the unsupported
+ * format.
+ */
+ if (fmt && fmt->layout == UTIL_FORMAT_LAYOUT_ASTC &&
+ device->info->verx10 >= 125) {
+ return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY,
+ "ASTC format not supported (%s).", __func__);
+ }
+ return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView);
+}
diff --git a/lib/mesa/src/intel/vulkan/layers/anv_doom64.c b/lib/mesa/src/intel/vulkan/layers/anv_doom64.c
new file mode 100644
index 000000000..80ca74f97
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/layers/anv_doom64.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/set.h"
+#include "anv_private.h"
+#include "vk_common_entrypoints.h"
+
+/**
+ * The DOOM 64 rendering corruption is happening because the game always uses
+ * ```
+ * vkCmdPipelineBarrier(VK_IMAGE_LAYOUT_UNDEFINED ->
+ * VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
+ * vkCmdCopyBufferToImage(...)
+ * vkCmdPipelineBarrier(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL ->
+ * VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)
+ * ```
+ * when it wants to update its texture atlas image.
+ *
+ * According to spec, transitioning from VK_IMAGE_LAYOUT_UNDEFINED means
+ * that the current image content might be discarded, but the game relies
+ * on it being fully preserved.
+ *
+ * This work-around layer implements super-barebone layout tracking: allows
+ * the first transition from VK_IMAGE_LAYOUT_UNDEFINED, but replaces
+ * oldLayout with VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL for each
+ * subsequent transition of that image.
+ *
+ * Gen12+ does not ambiguate CCS data on transition from VK_IMAGE_LAYOUT_UNDEFINED
+ * so it preserves all compressed information, and this WA is not needed.
+ */
+
+VKAPI_ATTR void VKAPI_CALL
+doom64_CmdPipelineBarrier(VkCommandBuffer commandBuffer,
+ VkPipelineStageFlags srcStageMask,
+ VkPipelineStageFlags dstStageMask,
+ VkDependencyFlags dependencyFlags,
+ uint32_t memoryBarrierCount,
+ const VkMemoryBarrier* pMemoryBarriers,
+ uint32_t bufferMemoryBarrierCount,
+ const VkBufferMemoryBarrier* pBufferMemoryBarriers,
+ uint32_t imageMemoryBarrierCount,
+ const VkImageMemoryBarrier* pImageMemoryBarriers)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, command_buffer, commandBuffer);
+ assert(command_buffer && command_buffer->device);
+
+ VkImageMemoryBarrier fixed_barrier;
+ struct set * defined_images =
+ command_buffer->device->workarounds.doom64_images;
+
+ if (defined_images &&
+ imageMemoryBarrierCount == 1 && pImageMemoryBarriers &&
+ pImageMemoryBarriers[0].oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
+ pImageMemoryBarriers[0].newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+ ANV_FROM_HANDLE(anv_image, image, pImageMemoryBarriers[0].image);
+
+ if (!_mesa_set_search(defined_images, image)) {
+ _mesa_set_add(defined_images, image);
+ } else {
+ memcpy(&fixed_barrier, pImageMemoryBarriers, sizeof(VkImageMemoryBarrier));
+
+ fixed_barrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+ pImageMemoryBarriers = (const VkImageMemoryBarrier*) &fixed_barrier;
+ }
+ }
+
+ vk_common_CmdPipelineBarrier(commandBuffer, srcStageMask, dstStageMask,
+ dependencyFlags, memoryBarrierCount,
+ pMemoryBarriers, bufferMemoryBarrierCount,
+ pBufferMemoryBarriers,
+ imageMemoryBarrierCount,
+ pImageMemoryBarriers);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+doom64_CreateImage(VkDevice _device, const VkImageCreateInfo* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator, VkImage* pImage)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ assert(device);
+
+ if (!device->workarounds.doom64_images) {
+ device->workarounds.doom64_images = _mesa_pointer_set_create(NULL);
+
+ if (!device->workarounds.doom64_images) {
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ }
+ }
+
+ return anv_CreateImage(_device, pCreateInfo, pAllocator, pImage);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+doom64_DestroyImage(VkDevice _device, VkImage _image,
+ const VkAllocationCallbacks *pAllocator)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_image, image, _image);
+ assert(device);
+
+ struct set * defined_images = device->workarounds.doom64_images;
+
+ if (image && defined_images) {
+ _mesa_set_remove_key(defined_images, image);
+
+ if (!defined_images->entries) {
+ _mesa_set_destroy(defined_images, NULL);
+ device->workarounds.doom64_images = NULL;
+ }
+ }
+
+ anv_DestroyImage(_device, _image, pAllocator);
+}
diff --git a/lib/mesa/src/intel/vulkan/meson.build b/lib/mesa/src/intel/vulkan/meson.build
index 9e54716df..be8a37e84 100644
--- a/lib/mesa/src/intel/vulkan/meson.build
+++ b/lib/mesa/src/intel/vulkan/meson.build
@@ -18,11 +18,13 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
+subdir('shaders')
+
inc_anv = include_directories('.')
anv_flags = [
no_override_init_args,
- c_sse2_args,
+ sse2_args,
]
anv_cpp_flags = []
@@ -38,24 +40,13 @@ anv_entrypoints = custom_target(
'--device-prefix', 'gfx11',
'--device-prefix', 'gfx12',
'--device-prefix', 'gfx125',
- '--device-prefix', 'hitman3'
+ '--device-prefix', 'doom64',
+ '--device-prefix', 'hitman3',
+ '--device-prefix', 'android'
],
depend_files : vk_entrypoints_gen_depend_files,
)
-float64_spv_h = custom_target(
- 'float64_spv.h',
- input : [glsl2spirv, float64_glsl_file],
- output : 'float64_spv.h',
- command : [
- prog_python, '@INPUT@', '@OUTPUT@',
- '--create-entry', 'main',
- '--vn', 'float64_spv_source',
- '--glsl-version', '450',
- '-Olib',
- ]
-)
-
idep_anv_headers = declare_dependency(
sources : [anv_entrypoints[0]],
include_directories : inc_anv,
@@ -87,23 +78,21 @@ intel_icd = custom_target(
install : true,
)
-if meson.version().version_compare('>= 0.58')
- _dev_icdname = 'intel_devenv_icd.@0@.json'.format(host_machine.cpu())
- custom_target(
- 'intel_devenv_icd',
- input : [vk_icd_gen, vk_api_xml],
- output : _dev_icdname,
- command : [
- prog_python, '@INPUT0@',
- '--api-version', '1.3', '--xml', '@INPUT1@',
- '--lib-path', meson.current_build_dir() / 'libvulkan_intel.so',
- '--out', '@OUTPUT@',
- ],
- build_by_default : true,
- )
+_dev_icdname = 'intel_devenv_icd.@0@.json'.format(host_machine.cpu())
+_dev_icd = custom_target(
+ 'intel_devenv_icd',
+ input : [vk_icd_gen, vk_api_xml],
+ output : _dev_icdname,
+ command : [
+ prog_python, '@INPUT0@',
+ '--api-version', '1.3', '--xml', '@INPUT1@',
+ '--lib-path', meson.current_build_dir() / 'libvulkan_intel.so',
+ '--out', '@OUTPUT@',
+ ],
+ build_by_default : true,
+)
- devenv.append('VK_ICD_FILENAMES', meson.current_build_dir() / _dev_icdname)
-endif
+devenv.append('VK_ICD_FILENAMES', _dev_icd.full_path())
libanv_per_hw_ver_libs = []
anv_per_hw_ver_files = files(
@@ -113,6 +102,7 @@ anv_per_hw_ver_files = files(
'genX_pipeline.c',
'genX_query.c',
'genX_state.c',
+ 'genX_video.c',
)
if with_intel_vk_rt
anv_per_hw_ver_files += files('genX_acceleration_structure.c',)
@@ -125,7 +115,7 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']],
_gfx_ver = g[0]
libanv_per_hw_ver_libs += static_library(
'anv_per_hw_ver@0@'.format(_gfx_ver),
- [anv_per_hw_ver_files, g[1], anv_entrypoints[0]],
+ [anv_per_hw_ver_files, g[1], anv_entrypoints[0], generated_draws_spvs, ],
include_directories : [
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
],
@@ -141,7 +131,21 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']],
endforeach
libanv_files = files(
+ 'i915/anv_batch_chain.c',
+ 'i915/anv_batch_chain.h',
+ 'i915/anv_device.c',
+ 'i915/anv_device.h',
+ 'i915/anv_kmd_backend.c',
+ 'layers/anv_doom64.c',
'layers/anv_hitman3.c',
+ 'layers/anv_android_layer.c',
+ 'xe/anv_batch_chain.c',
+ 'xe/anv_batch_chain.h',
+ 'xe/anv_kmd_backend.c',
+ 'xe/anv_device.c',
+ 'xe/anv_device.h',
+ 'xe/anv_queue.c',
+ 'xe/anv_queue.h',
'anv_allocator.c',
'anv_android.h',
'anv_batch_chain.c',
@@ -151,17 +155,19 @@ libanv_files = files(
'anv_descriptor_set.c',
'anv_device.c',
'anv_formats.c',
+ 'anv_generated_indirect_draws.c',
'anv_genX.h',
'anv_image.c',
+ 'anv_kmd_backend.c',
+ 'anv_kmd_backend.h',
'anv_measure.c',
'anv_measure.h',
+ 'anv_mesh_perprim_wa.c',
'anv_nir.h',
- 'anv_nir_add_base_work_group_id.c',
'anv_nir_apply_pipeline_layout.c',
'anv_nir_compute_push_layout.c',
'anv_nir_lower_multiview.c',
'anv_nir_lower_ubo_loads.c',
- 'anv_nir_lower_ycbcr_textures.c',
'anv_nir_push_descriptor_analysis.c',
'anv_perf.c',
'anv_pipeline.c',
@@ -170,6 +176,7 @@ libanv_files = files(
'anv_queue.c',
'anv_util.c',
'anv_utrace.c',
+ 'anv_video.c',
'anv_wsi.c',
)
@@ -208,6 +215,7 @@ libanv_common = static_library(
[
libanv_files, anv_entrypoints, sha1_h,
gen_xml_pack, float64_spv_h,
+ generated_draws_spvs,
],
include_directories : [
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
@@ -216,7 +224,7 @@ libanv_common = static_library(
c_args : anv_flags,
cpp_args : anv_cpp_flags,
gnu_symbol_visibility : 'hidden',
- dependencies : anv_deps,
+ dependencies : anv_deps
)
libvulkan_intel = shared_library(
@@ -227,17 +235,18 @@ libvulkan_intel = shared_library(
],
link_whole : [libanv_common, libanv_per_hw_ver_libs] + optional_libgrl,
link_with : [
- libintel_compiler, libintel_dev, libisl, libblorp, libintel_perf,
+ libintel_compiler, libisl, libblorp, libintel_perf,
],
dependencies : [
dep_thread, dep_dl, dep_m, anv_deps, idep_libintel_common,
idep_nir, idep_genxml, idep_vulkan_util, idep_vulkan_wsi,
idep_vulkan_runtime, idep_mesautil, idep_xmlconfig,
- idep_intel_driver_ds,
+ idep_intel_driver_ds, idep_intel_dev,
],
c_args : anv_flags,
gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_build_id, ld_args_bsymbolic, ld_args_gc_sections],
+ link_args : [vulkan_icd_link_args, ld_args_build_id, ld_args_bsymbolic, ld_args_gc_sections],
+ link_depends : vulkan_icd_link_depends,
install : true,
)
@@ -263,13 +272,13 @@ if with_tests
],
link_whole : libanv_common,
link_with : [
- libanv_per_hw_ver_libs, libintel_compiler, libintel_common, libintel_dev,
+ libanv_per_hw_ver_libs, libintel_compiler, libintel_common,
libisl, libblorp, libintel_perf,
] + optional_libgrl,
dependencies : [
dep_thread, dep_dl, dep_m, anv_deps,
idep_nir, idep_vulkan_util, idep_vulkan_wsi, idep_vulkan_runtime,
- idep_mesautil,
+ idep_mesautil, idep_intel_dev,
],
c_args : anv_flags,
gnu_symbol_visibility : 'hidden',
@@ -283,12 +292,12 @@ if with_tests
executable(
t,
['tests/@0@.c'.format(t), anv_entrypoints[0]],
- c_args : [ c_sse2_args ],
+ c_args : [ sse2_args ],
link_with : libvulkan_intel_test,
dependencies : [
dep_libdrm, dep_thread, dep_m, dep_valgrind,
idep_vulkan_util, idep_vulkan_wsi_headers,
- idep_vulkan_runtime, idep_intel_driver_ds,
+ idep_vulkan_runtime, idep_intel_driver_ds, idep_intel_dev,
],
include_directories : [
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
diff --git a/lib/mesa/src/intel/vulkan/shaders/common_generated_draws.glsl b/lib/mesa/src/intel/vulkan/shaders/common_generated_draws.glsl
new file mode 100644
index 000000000..06ea7781c
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/shaders/common_generated_draws.glsl
@@ -0,0 +1,133 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define BITFIELD_BIT(i) (1u << i)
+
+#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0)
+#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
+#define ANV_GENERATED_FLAG_DRAWID BITFIELD_BIT(2)
+#define ANV_GENERATED_FLAG_BASE BITFIELD_BIT(3)
+
+/* These 3 bindings will be accessed through A64 messages */
+layout(set = 0, binding = 0, std430) buffer Storage0 {
+ uint indirect_data[];
+};
+
+layout(set = 0, binding = 1, std430) buffer Storage1 {
+ uint commands[];
+};
+
+layout(set = 0, binding = 2, std430) buffer Storage2 {
+ uint draw_ids[];
+};
+
+/* This data will be provided through push constants. */
+layout(set = 0, binding = 3) uniform block {
+ uint64_t draw_id_addr;
+ uint64_t indirect_data_addr;
+ uint indirect_data_stride;
+ uint flags;
+ uint draw_base;
+ uint draw_count;
+ uint max_draw_count;
+ uint instance_multiplier;
+ uint64_t end_addr;
+};
+
+void write_VERTEX_BUFFER_STATE(uint write_offset,
+ uint mocs,
+ uint buffer_idx,
+ uint64_t address,
+ uint size)
+{
+ commands[write_offset + 0] = (0 << 0 | /* Buffer Pitch */
+ 0 << 13 | /* Null Vertex Buffer */
+ 1 << 14 | /* Address Modify Enable */
+ mocs << 16 | /* MOCS */
+ buffer_idx << 26); /* Vertex Buffer Index */
+ commands[write_offset + 1] = uint(address & 0xffffffff);
+ commands[write_offset + 2] = uint(address >> 32);
+ commands[write_offset + 3] = size;
+}
+
+void write_3DPRIMITIVE(uint write_offset,
+ bool is_predicated,
+ bool is_indexed,
+ uint vertex_count_per_instance,
+ uint start_vertex_location,
+ uint instance_count,
+ uint start_instance_location,
+ uint base_vertex_location)
+{
+ commands[write_offset + 0] = (3 << 29 | /* Command Type */
+ 3 << 27 | /* Command SubType */
+ 3 << 24 | /* 3D Command Opcode */
+ uint(is_predicated) << 8 |
+ 5 << 0); /* DWord Length */
+ commands[write_offset + 1] = uint(is_indexed) << 8;
+ commands[write_offset + 2] = vertex_count_per_instance;
+ commands[write_offset + 3] = start_vertex_location;
+ commands[write_offset + 4] = instance_count;
+ commands[write_offset + 5] = start_instance_location;
+ commands[write_offset + 6] = base_vertex_location;
+}
+
+void write_3DPRIMITIVE_EXTENDED(uint write_offset,
+ bool is_predicated,
+ bool is_indexed,
+ uint vertex_count_per_instance,
+ uint start_vertex_location,
+ uint instance_count,
+ uint start_instance_location,
+ uint base_vertex_location,
+ uint param_base_vertex,
+ uint param_base_instance,
+ uint param_draw_id)
+{
+ commands[write_offset + 0] = (3 << 29 | /* Command Type */
+ 3 << 27 | /* Command SubType */
+ 3 << 24 | /* 3D Command Opcode */
+ 1 << 11 | /* Extended Parameter Enable */
+ uint(is_predicated) << 8 |
+ 8 << 0); /* DWord Length */
+ commands[write_offset + 1] = uint(is_indexed) << 8;
+ commands[write_offset + 2] = vertex_count_per_instance;
+ commands[write_offset + 3] = start_vertex_location;
+ commands[write_offset + 4] = instance_count;
+ commands[write_offset + 5] = start_instance_location;
+ commands[write_offset + 6] = base_vertex_location;
+ commands[write_offset + 7] = param_base_vertex;
+ commands[write_offset + 8] = param_base_instance;
+ commands[write_offset + 9] = param_draw_id;
+}
+
+void write_MI_BATCH_BUFFER_START(uint write_offset,
+ uint64_t addr)
+{
+ commands[write_offset + 0] = (0 << 29 | /* Command Type */
+ 49 << 23 | /* MI Command Opcode */
+ 1 << 8 | /* Address Space Indicator (PPGTT) */
+ 1 << 0); /* DWord Length */
+ commands[write_offset + 1] = uint(addr & 0xffffffff);
+ commands[write_offset + 2] = uint(addr >> 32);
+}
diff --git a/lib/mesa/src/intel/vulkan/shaders/gfx11_generated_draws.glsl b/lib/mesa/src/intel/vulkan/shaders/gfx11_generated_draws.glsl
new file mode 100644
index 000000000..8745f7bab
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/shaders/gfx11_generated_draws.glsl
@@ -0,0 +1,85 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#version 450
+#extension GL_ARB_gpu_shader_int64 : enable
+#extension GL_GOOGLE_include_directive : enable
+
+#include "common_generated_draws.glsl"
+
+void main()
+{
+ bool is_indexed = (flags & ANV_GENERATED_FLAG_INDEXED) != 0;
+ bool is_predicated = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0;
+ uint _3dprim_dw_size = (flags >> 16) & 0xff;
+ uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
+ uint indirect_data_offset = item_idx * indirect_data_stride / 4;
+ uint cmd_idx = item_idx * _3dprim_dw_size;
+ uint draw_id = draw_base + item_idx;
+
+ if (draw_id < draw_count) {
+ if (is_indexed) {
+ /* Loading a VkDrawIndexedIndirectCommand */
+ uint index_count = indirect_data[indirect_data_offset + 0];
+ uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
+ uint first_index = indirect_data[indirect_data_offset + 2];
+ uint vertex_offset = indirect_data[indirect_data_offset + 3];
+ uint first_instance = indirect_data[indirect_data_offset + 4];
+
+ write_3DPRIMITIVE_EXTENDED(cmd_idx,
+ is_predicated,
+ is_indexed,
+ index_count,
+ first_index,
+ instance_count,
+ first_instance,
+ vertex_offset,
+ vertex_offset,
+ first_instance,
+ draw_id);
+ } else {
+ /* Loading a VkDrawIndirectCommand structure */
+ uint vertex_count = indirect_data[indirect_data_offset + 0];
+ uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
+ uint first_vertex = indirect_data[indirect_data_offset + 2];
+ uint first_instance = indirect_data[indirect_data_offset + 3];
+
+ write_3DPRIMITIVE_EXTENDED(cmd_idx,
+ is_predicated,
+ is_indexed,
+ vertex_count,
+ first_vertex,
+ instance_count,
+ first_instance,
+ 0 /* base_vertex_location */,
+ first_vertex,
+ first_instance,
+ draw_id);
+ }
+ } else if (draw_id == draw_count && draw_id < max_draw_count) {
+ /* Only write a jump forward in the batch if we have fewer elements than
+ * the max draw count.
+ */
+ write_MI_BATCH_BUFFER_START(cmd_idx, end_addr);
+ }
+}
diff --git a/lib/mesa/src/intel/vulkan/shaders/gfx9_generated_draws.glsl b/lib/mesa/src/intel/vulkan/shaders/gfx9_generated_draws.glsl
new file mode 100644
index 000000000..9850b19c3
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/shaders/gfx9_generated_draws.glsl
@@ -0,0 +1,144 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#version 450
+#extension GL_ARB_gpu_shader_int64 : enable
+#extension GL_GOOGLE_include_directive : enable
+
+#include "common_generated_draws.glsl"
+
+void main()
+{
+ bool is_indexed = (flags & ANV_GENERATED_FLAG_INDEXED) != 0;
+ bool is_predicated = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0;
+ bool uses_base = (flags & ANV_GENERATED_FLAG_BASE) != 0;
+ bool uses_drawid = (flags & ANV_GENERATED_FLAG_DRAWID) != 0;
+ uint mocs = (flags >> 8) & 0xff;
+ uint _3dprim_dw_size = (flags >> 16) & 0xff;
+ uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
+ uint indirect_data_offset = item_idx * indirect_data_stride / 4;
+ uint cmd_idx = item_idx * _3dprim_dw_size;
+ uint draw_id = draw_base + item_idx;
+
+ if (draw_id < draw_count) {
+ if (is_indexed) {
+ /* Loading a VkDrawIndexedIndirectCommand */
+ uint index_count = indirect_data[indirect_data_offset + 0];
+ uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
+ uint first_index = indirect_data[indirect_data_offset + 2];
+ uint vertex_offset = indirect_data[indirect_data_offset + 3];
+ uint first_instance = indirect_data[indirect_data_offset + 4];
+
+ if (uses_base || uses_drawid) {
+ uint state_vertex_len =
+ 1 + (uses_base ? 4 : 0) + (uses_drawid ? 4 : 0);
+ commands[cmd_idx] =
+ (3 << 29 | /* Command Type */
+ 3 << 27 | /* Command SubType */
+ 0 << 24 | /* 3D Command Opcode */
+ 8 << 16 | /* 3D Command Sub Opcode */
+ (state_vertex_len - 2) << 0); /* DWord Length */
+ cmd_idx += 1;
+ if (uses_base) {
+ uint64_t indirect_draw_data_addr =
+ indirect_data_addr + item_idx * indirect_data_stride + 12;
+ write_VERTEX_BUFFER_STATE(cmd_idx,
+ mocs,
+ 31,
+ indirect_draw_data_addr,
+ 8);
+ cmd_idx += 4;
+ }
+ if (uses_drawid) {
+ uint64_t draw_idx_addr = draw_id_addr + 4 * item_idx;
+ draw_ids[draw_id] = draw_id;
+ write_VERTEX_BUFFER_STATE(cmd_idx,
+ mocs,
+ 32,
+ draw_idx_addr,
+ 4);
+ cmd_idx += 4;
+ }
+ }
+ write_3DPRIMITIVE(cmd_idx,
+ is_predicated,
+ is_indexed,
+ index_count,
+ first_index,
+ instance_count,
+ first_instance,
+ vertex_offset);
+ } else {
+ /* Loading a VkDrawIndirectCommand structure */
+ uint vertex_count = indirect_data[indirect_data_offset + 0];
+ uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
+ uint first_vertex = indirect_data[indirect_data_offset + 2];
+ uint first_instance = indirect_data[indirect_data_offset + 3];
+
+ if (uses_base || uses_drawid) {
+ uint state_vertex_len =
+ 1 + (uses_base ? 4 : 0) + (uses_drawid ? 4 : 0);
+ commands[cmd_idx] =
+ (3 << 29 | /* Command Type */
+ 3 << 27 | /* Command SubType */
+ 0 << 24 | /* 3D Command Opcode */
+ 8 << 16 | /* 3D Command Sub Opcode */
+ (state_vertex_len - 2) << 0); /* DWord Length */
+ cmd_idx += 1;
+ if (uses_base) {
+ uint64_t indirect_draw_data_addr =
+ indirect_data_addr + item_idx * indirect_data_stride + 8;
+ write_VERTEX_BUFFER_STATE(cmd_idx,
+ mocs,
+ 31,
+ indirect_draw_data_addr,
+ 8);
+ cmd_idx += 4;
+ }
+ if (uses_drawid) {
+ uint64_t draw_idx_addr = draw_id_addr + 4 * item_idx;
+ draw_ids[draw_id] = draw_id;
+ write_VERTEX_BUFFER_STATE(cmd_idx,
+ mocs,
+ 32,
+ draw_idx_addr,
+ 4);
+ cmd_idx += 4;
+ }
+ }
+ write_3DPRIMITIVE(cmd_idx,
+ is_predicated,
+ is_indexed,
+ vertex_count,
+ first_vertex,
+ instance_count,
+ first_instance,
+ 0 /* base_vertex_location */);
+ }
+ } else if (draw_id == draw_count && draw_id < max_draw_count) {
+ /* Only write a jump forward in the batch if we have fewer elements than
+ * the max draw count.
+ */
+ write_MI_BATCH_BUFFER_START(cmd_idx, end_addr);
+ }
+}
diff --git a/lib/mesa/src/intel/vulkan/shaders/meson.build b/lib/mesa/src/intel/vulkan/shaders/meson.build
new file mode 100644
index 000000000..2f1952ee5
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/shaders/meson.build
@@ -0,0 +1,56 @@
+# Copyright © 2022 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+float64_spv_h = custom_target(
+ 'float64_spv.h',
+ input : [glsl2spirv, float64_glsl_file],
+ output : 'float64_spv.h',
+ command : [
+ prog_python, '@INPUT@', '@OUTPUT@',
+ prog_glslang,
+ '--create-entry', 'main',
+ '--vn', 'float64_spv_source',
+ '--glsl-version', '450',
+ '-Olib',
+ ]
+)
+
+generated_draws_shaders = [
+ 'gfx9_generated_draws.glsl',
+ 'gfx11_generated_draws.glsl',
+]
+
+generated_draws_spvs = []
+foreach f : generated_draws_shaders
+ spv_filename = f.replace('.glsl', '_spv.h')
+ src_name = f.replace('.glsl', '_spv_source')
+ generated_draws_spvs += custom_target(
+ spv_filename,
+ input : [glsl2spirv, f, files('common_generated_draws.glsl')],
+ output : spv_filename,
+ command : [
+ prog_python, '@INPUT0@', '@INPUT1@', '@OUTPUT@',
+ prog_glslang,
+ '--vn', src_name,
+ '--glsl-version', '450',
+ '--stage', 'frag',
+ '-I' + meson.current_source_dir(),
+ ])
+endforeach
diff --git a/lib/mesa/src/intel/vulkan/tests/block_pool_grow_first.c b/lib/mesa/src/intel/vulkan/tests/block_pool_grow_first.c
index 7359b66cb..5ad230392 100644
--- a/lib/mesa/src/intel/vulkan/tests/block_pool_grow_first.c
+++ b/lib/mesa/src/intel/vulkan/tests/block_pool_grow_first.c
@@ -36,7 +36,9 @@ int main(void)
const uint32_t block_size = 16 * 1024;
const uint32_t initial_size = block_size / 2;
+ test_device_info_init(&physical_device.info);
anv_device_set_physical(&device, &physical_device);
+ device.kmd_backend = anv_kmd_backend_get(INTEL_KMD_TYPE_STUB);
pthread_mutex_init(&device.mutex, NULL);
anv_bo_cache_init(&device.bo_cache, &device);
anv_block_pool_init(&pool, &device, "test", 4096, initial_size);
diff --git a/lib/mesa/src/intel/vulkan/tests/state_pool_padding.c b/lib/mesa/src/intel/vulkan/tests/state_pool_padding.c
index b76ba8ad6..845767a35 100644
--- a/lib/mesa/src/intel/vulkan/tests/state_pool_padding.c
+++ b/lib/mesa/src/intel/vulkan/tests/state_pool_padding.c
@@ -30,7 +30,9 @@ int main(void)
struct anv_device device = {};
struct anv_state_pool state_pool;
+ test_device_info_init(&physical_device.info);
anv_device_set_physical(&device, &physical_device);
+ device.kmd_backend = anv_kmd_backend_get(INTEL_KMD_TYPE_STUB);
pthread_mutex_init(&device.mutex, NULL);
anv_bo_cache_init(&device.bo_cache, &device);
anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 4096);
diff --git a/lib/mesa/src/intel/vulkan/tests/test_common.h b/lib/mesa/src/intel/vulkan/tests/test_common.h
index 3f883e3bd..ae84935f3 100644
--- a/lib/mesa/src/intel/vulkan/tests/test_common.h
+++ b/lib/mesa/src/intel/vulkan/tests/test_common.h
@@ -32,3 +32,8 @@
abort(); \
} \
} while (false)
+
+static inline void test_device_info_init(struct intel_device_info *info)
+{
+ info->mem_alignment = 4096;
+}
diff --git a/lib/mesa/src/intel/vulkan/xe/anv_batch_chain.c b/lib/mesa/src/intel/vulkan/xe/anv_batch_chain.c
new file mode 100644
index 000000000..dbcf989d7
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/xe/anv_batch_chain.c
@@ -0,0 +1,281 @@
+/*
+ * Copyright © 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "xe/anv_batch_chain.h"
+
+#include "anv_private.h"
+
+#include <xf86drm.h>
+
+#include "drm-uapi/xe_drm.h"
+
+VkResult
+xe_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
+ uint32_t batch_bo_size)
+{
+ struct anv_device *device = queue->device;
+ VkResult result = VK_SUCCESS;
+ uint32_t syncobj_handle;
+
+ if (drmSyncobjCreate(device->fd, 0, &syncobj_handle))
+ return vk_errorf(device, VK_ERROR_UNKNOWN, "Unable to create sync obj");
+
+ struct drm_xe_sync sync = {
+ .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL,
+ .handle = syncobj_handle,
+ };
+ struct drm_xe_exec exec = {
+ .engine_id = queue->engine_id,
+ .num_batch_buffer = 1,
+ .address = batch_bo->offset,
+ .num_syncs = 1,
+ .syncs = (uintptr_t)&sync,
+ };
+
+ if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) {
+ result = vk_device_set_lost(&device->vk, "XE_EXEC failed: %m");
+ goto exec_error;
+ }
+
+ struct drm_syncobj_wait wait = {
+ .handles = (uintptr_t)&syncobj_handle,
+ .timeout_nsec = INT64_MAX,
+ .count_handles = 1,
+ };
+ if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait))
+ result = vk_device_set_lost(&device->vk, "DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
+
+exec_error:
+ drmSyncobjDestroy(device->fd, syncobj_handle);
+
+ return result;
+}
+
+#define TYPE_SIGNAL true
+#define TYPE_WAIT false
+
+static void
+xe_exec_fill_sync(struct drm_xe_sync *xe_sync, struct vk_sync *vk_sync,
+ uint64_t value, bool signal)
+{
+ if (unlikely(!vk_sync_type_is_drm_syncobj(vk_sync->type))) {
+ unreachable("Unsupported sync type");
+ return;
+ }
+
+ const struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(vk_sync);
+ xe_sync->handle = syncobj->syncobj;
+
+ if (value) {
+ xe_sync->flags |= DRM_XE_SYNC_TIMELINE_SYNCOBJ;
+ xe_sync->timeline_value = value;
+ } else {
+ xe_sync->flags |= DRM_XE_SYNC_SYNCOBJ;
+ }
+
+ if (signal)
+ xe_sync->flags |= DRM_XE_SYNC_SIGNAL;
+}
+
+static VkResult
+xe_exec_process_syncs(struct anv_queue *queue,
+ uint32_t wait_count, const struct vk_sync_wait *waits,
+ uint32_t signal_count, const struct vk_sync_signal *signals,
+ struct anv_utrace_submit *utrace_submit,
+ struct drm_xe_sync **ret, uint32_t *ret_count)
+{
+ struct anv_device *device = queue->device;
+ uint32_t num_syncs = wait_count + signal_count + (utrace_submit ? 1 : 0) +
+ (queue->sync ? 1 : 0);
+
+ if (!num_syncs)
+ return VK_SUCCESS;
+
+ struct drm_xe_sync *xe_syncs = vk_zalloc(&device->vk.alloc,
+ sizeof(*xe_syncs) * num_syncs, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!xe_syncs)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ uint32_t count = 0;
+
+ /* Signal the utrace sync only if it doesn't have a batch. Otherwise the
+ * it's the utrace batch that should signal its own sync.
+ */
+ if (utrace_submit && !utrace_submit->batch_bo) {
+ struct drm_xe_sync *xe_sync = &xe_syncs[count++];
+
+ xe_exec_fill_sync(xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL);
+ }
+
+ for (uint32_t i = 0; i < wait_count; i++) {
+ struct drm_xe_sync *xe_sync = &xe_syncs[count++];
+ const struct vk_sync_wait *vk_wait = &waits[i];
+
+ xe_exec_fill_sync(xe_sync, vk_wait->sync, vk_wait->wait_value,
+ TYPE_WAIT);
+ }
+
+ for (uint32_t i = 0; i < signal_count; i++) {
+ struct drm_xe_sync *xe_sync = &xe_syncs[count++];
+ const struct vk_sync_signal *vk_signal = &signals[i];
+
+ xe_exec_fill_sync(xe_sync, vk_signal->sync, vk_signal->signal_value,
+ TYPE_SIGNAL);
+ }
+
+ if (queue->sync) {
+ struct drm_xe_sync *xe_sync = &xe_syncs[count++];
+
+ xe_exec_fill_sync(xe_sync, queue->sync, 0,
+ TYPE_SIGNAL);
+ }
+
+ assert(count == num_syncs);
+ *ret = xe_syncs;
+ *ret_count = num_syncs;
+ return VK_SUCCESS;
+}
+
+static void
+xe_exec_print_debug(struct anv_queue *queue, uint32_t cmd_buffer_count,
+ struct anv_cmd_buffer **cmd_buffers, struct anv_query_pool *perf_query_pool,
+ uint32_t perf_query_pass, struct drm_xe_exec *exec)
+{
+ if (INTEL_DEBUG(DEBUG_SUBMIT))
+ fprintf(stderr, "Batch offset=0x%016"PRIx64" on queue %u\n",
+ (uint64_t)exec->address, queue->vk.index_in_family);
+
+ anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers,
+ perf_query_pool, perf_query_pass);
+}
+
+VkResult
+xe_queue_exec_utrace_locked(struct anv_queue *queue,
+ struct anv_utrace_submit *utrace_submit)
+{
+ struct anv_device *device = queue->device;
+ struct drm_xe_sync xe_sync = {};
+
+ xe_exec_fill_sync(&xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL);
+
+#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
+ if (device->physical->memory.need_clflush)
+ intel_flush_range(utrace_submit->batch_bo->map,
+ utrace_submit->batch_bo->size);
+#endif
+
+ struct drm_xe_exec exec = {
+ .engine_id = queue->engine_id,
+ .num_batch_buffer = 1,
+ .syncs = (uintptr_t)&xe_sync,
+ .num_syncs = 1,
+ .address = utrace_submit->batch_bo->offset,
+ };
+ if (likely(!device->info->no_hw)) {
+ if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec))
+ return vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m");
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult
+xe_queue_exec_locked(struct anv_queue *queue,
+ uint32_t wait_count,
+ const struct vk_sync_wait *waits,
+ uint32_t cmd_buffer_count,
+ struct anv_cmd_buffer **cmd_buffers,
+ uint32_t signal_count,
+ const struct vk_sync_signal *signals,
+ struct anv_query_pool *perf_query_pool,
+ uint32_t perf_query_pass)
+{
+ struct anv_device *device = queue->device;
+ struct anv_utrace_submit *utrace_submit = NULL;
+ VkResult result;
+
+ result = anv_device_utrace_flush_cmd_buffers(queue, cmd_buffer_count,
+ cmd_buffers, &utrace_submit);
+ if (result != VK_SUCCESS)
+ return result;
+
+ struct drm_xe_sync *xe_syncs = NULL;
+ uint32_t xe_syncs_count = 0;
+ result = xe_exec_process_syncs(queue, wait_count, waits,
+ signal_count, signals,
+ utrace_submit,
+ &xe_syncs, &xe_syncs_count);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* If we have no batch for utrace, just forget about it now. */
+ if (utrace_submit && !utrace_submit->batch_bo)
+ utrace_submit = NULL;
+
+ struct drm_xe_exec exec = {
+ .engine_id = queue->engine_id,
+ .num_batch_buffer = 1,
+ .syncs = (uintptr_t)xe_syncs,
+ .num_syncs = xe_syncs_count,
+ };
+
+ if (cmd_buffer_count) {
+ anv_cmd_buffer_chain_command_buffers(cmd_buffers, cmd_buffer_count);
+
+#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
+ if (device->physical->memory.need_clflush)
+ anv_cmd_buffer_clflush(cmd_buffers, cmd_buffer_count);
+#endif
+
+ struct anv_cmd_buffer *first_cmd_buffer = cmd_buffers[0];
+ struct anv_batch_bo *first_batch_bo = list_first_entry(&first_cmd_buffer->batch_bos,
+ struct anv_batch_bo, link);
+ exec.address = first_batch_bo->bo->offset;
+ } else {
+ exec.address = device->trivial_batch_bo->offset;
+ }
+
+ xe_exec_print_debug(queue, cmd_buffer_count, cmd_buffers, perf_query_pool,
+ perf_query_pass, &exec);
+
+ /* TODO: add perfetto stuff when Xe supports it */
+
+ if (!device->info->no_hw) {
+ if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec))
+ result = vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m");
+ }
+ vk_free(&device->vk.alloc, xe_syncs);
+
+ if (result == VK_SUCCESS && queue->sync) {
+ result = vk_sync_wait(&device->vk, queue->sync, 0,
+ VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
+ if (result != VK_SUCCESS)
+ result = vk_queue_set_lost(&queue->vk, "sync wait failed");
+ }
+
+ if (result == VK_SUCCESS && utrace_submit)
+ result = xe_queue_exec_utrace_locked(queue, utrace_submit);
+
+ return result;
+}
diff --git a/lib/mesa/src/intel/vulkan/xe/anv_batch_chain.h b/lib/mesa/src/intel/vulkan/xe/anv_batch_chain.h
new file mode 100644
index 000000000..9ee877e04
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/xe/anv_batch_chain.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright © 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include "vulkan/vulkan_core.h"
+#include "vk_sync.h"
+
+struct anv_queue;
+struct anv_bo;
+struct anv_cmd_buffer;
+struct anv_query_pool;
+struct anv_utrace_submit;
+
+VkResult
+xe_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
+ uint32_t batch_bo_size);
+VkResult
+xe_queue_exec_locked(struct anv_queue *queue,
+ uint32_t wait_count,
+ const struct vk_sync_wait *waits,
+ uint32_t cmd_buffer_count,
+ struct anv_cmd_buffer **cmd_buffers,
+ uint32_t signal_count,
+ const struct vk_sync_signal *signals,
+ struct anv_query_pool *perf_query_pool,
+ uint32_t perf_query_pass);
+
+VkResult
+xe_queue_exec_utrace_locked(struct anv_queue *queue,
+ struct anv_utrace_submit *utrace_submit);
diff --git a/lib/mesa/src/intel/vulkan/xe/anv_device.c b/lib/mesa/src/intel/vulkan/xe/anv_device.c
new file mode 100644
index 000000000..a5827d968
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/xe/anv_device.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright © 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "xe/anv_device.h"
+#include "anv_private.h"
+
+#include "drm-uapi/xe_drm.h"
+
+bool anv_xe_device_destroy_vm(struct anv_device *device)
+{
+ struct drm_xe_vm_destroy destroy = {
+ .vm_id = device->vm_id,
+ };
+ return intel_ioctl(device->fd, DRM_IOCTL_XE_VM_DESTROY, &destroy) == 0;
+}
+
+VkResult anv_xe_device_setup_vm(struct anv_device *device)
+{
+ struct drm_xe_vm_create create = {
+ .flags = DRM_XE_VM_CREATE_SCRATCH_PAGE,
+ };
+ if (intel_ioctl(device->fd, DRM_IOCTL_XE_VM_CREATE, &create) != 0)
+ return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
+ "vm creation failed");
+
+ device->vm_id = create.vm_id;
+ return VK_SUCCESS;
+}
+
+enum drm_sched_priority
+anv_vk_priority_to_drm_sched_priority(VkQueueGlobalPriorityKHR vk_priority)
+{
+ switch (vk_priority) {
+ case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR:
+ return DRM_SCHED_PRIORITY_MIN;
+ case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR:
+ return DRM_SCHED_PRIORITY_NORMAL;
+ case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR:
+ return DRM_SCHED_PRIORITY_HIGH;
+ default:
+ unreachable("Invalid priority");
+ return DRM_SCHED_PRIORITY_MIN;
+ }
+}
+
+static VkQueueGlobalPriorityKHR
+drm_sched_priority_to_vk_priority(enum drm_sched_priority drm_sched_priority)
+{
+ switch (drm_sched_priority) {
+ case DRM_SCHED_PRIORITY_MIN:
+ return VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
+ case DRM_SCHED_PRIORITY_NORMAL:
+ return VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
+ case DRM_SCHED_PRIORITY_HIGH:
+ return VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
+ default:
+ unreachable("Invalid drm_sched_priority");
+ return VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
+ }
+}
+
+static void *
+xe_query_alloc_fetch(struct anv_physical_device *device, uint32_t query_id)
+{
+ struct drm_xe_device_query query = {
+ .query = query_id,
+ };
+ if (intel_ioctl(device->local_fd, DRM_IOCTL_XE_DEVICE_QUERY, &query))
+ return NULL;
+
+ void *data = calloc(1, query.size);
+ if (!data)
+ return NULL;
+
+ query.data = (uintptr_t)data;
+ if (intel_ioctl(device->local_fd, DRM_IOCTL_XE_DEVICE_QUERY, &query)) {
+ free(data);
+ return NULL;
+ }
+
+ return data;
+}
+
+VkResult
+anv_xe_physical_device_get_parameters(struct anv_physical_device *device)
+{
+ struct drm_xe_query_config *config;
+
+ config = xe_query_alloc_fetch(device, DRM_XE_DEVICE_QUERY_CONFIG);
+ if (!config)
+ return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
+ "unable to query device config");
+
+ device->has_exec_timeline = true;
+ device->max_context_priority =
+ drm_sched_priority_to_vk_priority(config->info[XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY]);
+
+ free(config);
+ return VK_SUCCESS;
+}
+
+VkResult
+anv_xe_device_check_status(struct vk_device *vk_device)
+{
+ struct anv_device *device = container_of(vk_device, struct anv_device, vk);
+ VkResult result = VK_SUCCESS;
+
+ for (uint32_t i = 0; i < device->queue_count; i++) {
+ struct drm_xe_engine_get_property engine_get_property = {
+ .engine_id = device->queues[i].engine_id,
+ .property = XE_ENGINE_GET_PROPERTY_BAN,
+ };
+ int ret = intel_ioctl(device->fd, DRM_IOCTL_XE_ENGINE_GET_PROPERTY,
+ &engine_get_property);
+
+ if (ret || engine_get_property.value) {
+ result = vk_device_set_lost(&device->vk, "One or more queues banned");
+ break;
+ }
+ }
+
+ return result;
+}
diff --git a/lib/mesa/src/intel/vulkan/xe/anv_device.h b/lib/mesa/src/intel/vulkan/xe/anv_device.h
new file mode 100644
index 000000000..669d5639c
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/xe/anv_device.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+
+#include "vulkan/vulkan_core.h"
+#include "vk_device.h"
+
+#include "drm-uapi/gpu_scheduler.h"
+
+struct anv_device;
+struct anv_physical_device;
+
+bool anv_xe_device_destroy_vm(struct anv_device *device);
+VkResult anv_xe_device_setup_vm(struct anv_device *device);
+VkResult anv_xe_device_check_status(struct vk_device *vk_device);
+
+VkResult
+anv_xe_physical_device_get_parameters(struct anv_physical_device *device);
+enum drm_sched_priority
+anv_vk_priority_to_drm_sched_priority(VkQueueGlobalPriorityKHR vk_priority);
diff --git a/lib/mesa/src/intel/vulkan/xe/anv_kmd_backend.c b/lib/mesa/src/intel/vulkan/xe/anv_kmd_backend.c
new file mode 100644
index 000000000..46c4939e4
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/xe/anv_kmd_backend.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright © 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sys/mman.h>
+#include <xf86drm.h>
+
+#include "anv_private.h"
+
+#include "xe/anv_batch_chain.h"
+
+#include "drm-uapi/xe_drm.h"
+
+static uint32_t
+xe_gem_create(struct anv_device *device,
+ const struct intel_memory_class_instance **regions,
+ uint16_t regions_count, uint64_t size,
+ enum anv_bo_alloc_flags alloc_flags,
+ uint64_t *actual_size)
+{
+ struct drm_xe_gem_create gem_create = {
+ /* From xe_drm.h: If a VM is specified, this BO must:
+ * 1. Only ever be bound to that VM.
+ * 2. Cannot be exported as a PRIME fd.
+ */
+ .vm_id = alloc_flags & ANV_BO_ALLOC_EXTERNAL ? 0 : device->vm_id,
+ .size = align64(size, device->info->mem_alignment),
+ .flags = alloc_flags & ANV_BO_ALLOC_SCANOUT ? XE_GEM_CREATE_FLAG_SCANOUT : 0,
+ };
+ for (uint16_t i = 0; i < regions_count; i++)
+ gem_create.flags |= BITFIELD_BIT(regions[i]->instance);
+
+ if (intel_ioctl(device->fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create))
+ return 0;
+
+ *actual_size = gem_create.size;
+ return gem_create.handle;
+}
+
+static void
+xe_gem_close(struct anv_device *device, uint32_t handle)
+{
+ struct drm_gem_close close = {
+ .handle = handle,
+ };
+ intel_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+
+static void *
+xe_gem_mmap(struct anv_device *device, struct anv_bo *bo, uint64_t offset,
+ uint64_t size, VkMemoryPropertyFlags property_flags)
+{
+ struct drm_xe_gem_mmap_offset args = {
+ .handle = bo->gem_handle,
+ };
+ if (intel_ioctl(device->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &args))
+ return MAP_FAILED;
+
+ return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ device->fd, args.offset);
+}
+
+static inline int
+xe_gem_vm_bind_op(struct anv_device *device, struct anv_bo *bo, uint32_t op)
+{
+ uint32_t syncobj_handle;
+ int ret = drmSyncobjCreate(device->fd, 0, &syncobj_handle);
+
+ if (ret)
+ return ret;
+
+ struct drm_xe_sync sync = {
+ .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL,
+ .handle = syncobj_handle,
+ };
+ struct drm_xe_vm_bind args = {
+ .vm_id = device->vm_id,
+ .num_binds = 1,
+ .bind.obj = op == XE_VM_BIND_OP_UNMAP ? 0 : bo->gem_handle,
+ .bind.obj_offset = 0,
+ .bind.range = bo->actual_size,
+ .bind.addr = intel_48b_address(bo->offset),
+ .bind.op = op,
+ .num_syncs = 1,
+ .syncs = (uintptr_t)&sync,
+ };
+ ret = intel_ioctl(device->fd, DRM_IOCTL_XE_VM_BIND, &args);
+ if (ret)
+ goto bind_error;
+
+ struct drm_syncobj_wait wait = {
+ .handles = (uintptr_t)&syncobj_handle,
+ .timeout_nsec = INT64_MAX,
+ .count_handles = 1,
+ .flags = 0,
+ .first_signaled = 0,
+ .pad = 0,
+ };
+ intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait);
+
+bind_error:
+ drmSyncobjDestroy(device->fd, syncobj_handle);
+ return ret;
+}
+
+static int xe_gem_vm_bind(struct anv_device *device, struct anv_bo *bo)
+{
+ return xe_gem_vm_bind_op(device, bo, XE_VM_BIND_OP_MAP);
+}
+
+static int xe_gem_vm_unbind(struct anv_device *device, struct anv_bo *bo)
+{
+ return xe_gem_vm_bind_op(device, bo, XE_VM_BIND_OP_UNMAP);
+}
+
+const struct anv_kmd_backend *
+anv_xe_kmd_backend_get(void)
+{
+ static const struct anv_kmd_backend xe_backend = {
+ .gem_create = xe_gem_create,
+ .gem_close = xe_gem_close,
+ .gem_mmap = xe_gem_mmap,
+ .gem_vm_bind = xe_gem_vm_bind,
+ .gem_vm_unbind = xe_gem_vm_unbind,
+ .execute_simple_batch = xe_execute_simple_batch,
+ .queue_exec_locked = xe_queue_exec_locked,
+ .queue_exec_trace = xe_queue_exec_utrace_locked,
+ };
+ return &xe_backend;
+}
diff --git a/lib/mesa/src/intel/vulkan/xe/anv_queue.c b/lib/mesa/src/intel/vulkan/xe/anv_queue.c
new file mode 100644
index 000000000..5c42435c7
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/xe/anv_queue.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright © 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "xe/anv_queue.h"
+
+#include "anv_private.h"
+
+#include "common/xe/intel_engine.h"
+#include "common/intel_gem.h"
+
+#include "xe/anv_device.h"
+
+#include "drm-uapi/xe_drm.h"
+#include "drm-uapi/gpu_scheduler.h"
+
+VkResult
+anv_xe_create_engine(struct anv_device *device,
+ struct anv_queue *queue,
+ const VkDeviceQueueCreateInfo *pCreateInfo)
+{
+ struct anv_physical_device *physical = device->physical;
+ struct anv_queue_family *queue_family =
+ &physical->queue.families[pCreateInfo->queueFamilyIndex];
+ const struct intel_query_engine_info *engines = physical->engine_info;
+ struct drm_xe_engine_class_instance *instances;
+
+ instances = vk_alloc(&device->vk.alloc,
+ sizeof(*instances) * queue_family->queueCount, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!instances)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ /* Build a list of all compatible HW engines */
+ uint32_t count = 0;
+ for (uint32_t i = 0; i < engines->num_engines; i++) {
+ const struct intel_engine_class_instance engine = engines->engines[i];
+ if (engine.engine_class != queue_family->engine_class)
+ continue;
+
+ instances[count].engine_class = intel_engine_class_to_xe(engine.engine_class);
+ instances[count].engine_instance = engine.engine_instance;
+ /* TODO: handle gt_id, MTL and newer platforms will have media engines
+ * in a separated gt
+ */
+ instances[count++].gt_id = 0;
+ }
+
+ assert(device->vm_id != 0);
+ struct drm_xe_engine_create create = {
+ /* Allows KMD to pick one of those engines for the submission queue */
+ .instances = (uintptr_t)instances,
+ .vm_id = device->vm_id,
+ .width = 1,
+ .num_placements = count,
+ };
+ int ret = intel_ioctl(device->fd, DRM_IOCTL_XE_ENGINE_CREATE, &create);
+ vk_free(&device->vk.alloc, instances);
+ if (ret)
+ return vk_errorf(device, VK_ERROR_UNKNOWN, "Unable to create engine");
+
+ queue->engine_id = create.engine_id;
+
+ const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority =
+ vk_find_struct_const(pCreateInfo->pNext,
+ DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
+ const VkQueueGlobalPriorityKHR priority = queue_priority ?
+ queue_priority->globalPriority :
+ VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
+
+ /* As per spec, the driver implementation may deny requests to acquire
+ * a priority above the default priority (MEDIUM) if the caller does not
+ * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR
+ * is returned.
+ */
+ if (physical->max_context_priority >= VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
+ if (priority > physical->max_context_priority)
+ goto priority_error;
+
+ struct drm_xe_engine_set_property engine_property = {
+ .engine_id = create.engine_id,
+ .property = XE_ENGINE_SET_PROPERTY_PRIORITY,
+ .value = anv_vk_priority_to_drm_sched_priority(priority),
+ };
+ ret = intel_ioctl(device->fd, DRM_IOCTL_XE_ENGINE_SET_PROPERTY,
+ &engine_property);
+ if (ret && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR)
+ goto priority_error;
+ }
+
+ return VK_SUCCESS;
+
+priority_error:
+ anv_xe_destroy_engine(device, queue);
+ return vk_error(device, VK_ERROR_NOT_PERMITTED_KHR);
+}
+
+void
+anv_xe_destroy_engine(struct anv_device *device, struct anv_queue *queue)
+{
+ struct drm_xe_engine_destroy destroy = {
+ .engine_id = queue->engine_id,
+ };
+ intel_ioctl(device->fd, DRM_IOCTL_XE_ENGINE_DESTROY, &destroy);
+}
diff --git a/lib/mesa/src/intel/vulkan/xe/anv_queue.h b/lib/mesa/src/intel/vulkan/xe/anv_queue.h
new file mode 100644
index 000000000..646f0ef2f
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/xe/anv_queue.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright © 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include "vulkan/vulkan_core.h"
+
+struct anv_device;
+struct anv_queue;
+
+VkResult
+anv_xe_create_engine(struct anv_device *device,
+ struct anv_queue *queue,
+ const VkDeviceQueueCreateInfo *pCreateInfo);
+void
+anv_xe_destroy_engine(struct anv_device *device, struct anv_queue *queue);