summaryrefslogtreecommitdiff
path: root/lib/mesa/src/intel/vulkan/anv_device.c
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2022-09-02 05:47:02 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2022-09-02 05:47:02 +0000
commit0dbbf1e0708df85a357d70e2708c0a11aeb5480e (patch)
tree6656ff8eb8b15a2fc1c02888973caf618388cfd0 /lib/mesa/src/intel/vulkan/anv_device.c
parent5f66494d31f735486b8222ecfa0a0c9046e92543 (diff)
Merge Mesa 22.1.7
Diffstat (limited to 'lib/mesa/src/intel/vulkan/anv_device.c')
-rw-r--r--lib/mesa/src/intel/vulkan/anv_device.c1262
1 files changed, 681 insertions, 581 deletions
diff --git a/lib/mesa/src/intel/vulkan/anv_device.c b/lib/mesa/src/intel/vulkan/anv_device.c
index cd9159df8..617d4eed1 100644
--- a/lib/mesa/src/intel/vulkan/anv_device.c
+++ b/lib/mesa/src/intel/vulkan/anv_device.c
@@ -22,6 +22,7 @@
*/
#include <assert.h>
+#include <inttypes.h>
#include <stdbool.h>
#include <string.h>
#ifdef MAJOR_IN_MKDEV
@@ -52,18 +53,22 @@
#include "git_sha1.h"
#include "vk_util.h"
#include "vk_deferred_operation.h"
+#include "vk_drm_syncobj.h"
#include "common/intel_aux_map.h"
#include "common/intel_defines.h"
#include "common/intel_uuid.h"
#include "perf/intel_perf.h"
#include "genxml/gen7_pack.h"
+#include "genxml/genX_bits.h"
static const driOptionDescription anv_dri_options[] = {
DRI_CONF_SECTION_PERFORMANCE
+ DRI_CONF_ADAPTIVE_SYNC(true)
DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
+ DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_DEBUG
@@ -90,14 +95,14 @@ compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
{
char str[MAX_DEBUG_MESSAGE_LENGTH];
struct anv_device *device = (struct anv_device *)data;
- struct anv_instance *instance = device->physical->instance;
+ UNUSED struct anv_instance *instance = device->physical->instance;
va_list args;
va_start(args, fmt);
(void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
va_end(args);
- vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
+ //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
}
static void
@@ -122,7 +127,7 @@ compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
#ifdef ANDROID
#define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
#else
-#define ANV_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
+#define ANV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
#endif
VkResult anv_EnumerateInstanceVersion(
@@ -139,6 +144,7 @@ static const struct vk_instance_extension_table instance_extensions = {
.KHR_external_semaphore_capabilities = true,
.KHR_get_physical_device_properties2 = true,
.EXT_debug_report = true,
+ .EXT_debug_utils = true,
#ifdef ANV_USE_WSI_PLATFORM
.KHR_get_surface_capabilities2 = true,
@@ -170,6 +176,12 @@ static void
get_device_extensions(const struct anv_physical_device *device,
struct vk_device_extension_table *ext)
{
+ const bool has_syncobj_wait =
+ (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
+
+ const bool nv_mesh_shading_enabled =
+ env_var_as_boolean("ANV_EXPERIMENTAL_NV_MESH_SHADER", false);
+
*ext = (struct vk_device_extension_table) {
.KHR_8bit_storage = device->info.ver >= 8,
.KHR_16bit_storage = device->info.ver >= 8,
@@ -184,8 +196,9 @@ get_device_extensions(const struct anv_physical_device *device,
.KHR_device_group = true,
.KHR_draw_indirect_count = true,
.KHR_driver_properties = true,
- .KHR_external_fence = device->has_syncobj_wait,
- .KHR_external_fence_fd = device->has_syncobj_wait,
+ .KHR_dynamic_rendering = true,
+ .KHR_external_fence = has_syncobj_wait,
+ .KHR_external_fence_fd = has_syncobj_wait,
.KHR_external_memory = true,
.KHR_external_memory_fd = true,
.KHR_external_semaphore = true,
@@ -204,18 +217,18 @@ get_device_extensions(const struct anv_physical_device *device,
.KHR_maintenance4 = true,
.KHR_multiview = true,
.KHR_performance_query =
- device->use_softpin && device->perf &&
+ !anv_use_relocations(device) && device->perf &&
(device->perf->i915_perf_version >= 3 ||
INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
device->use_call_secondary,
.KHR_pipeline_executable_properties = true,
.KHR_push_descriptor = true,
+ .KHR_ray_query = device->info.has_ray_tracing,
.KHR_relaxed_block_layout = true,
.KHR_sampler_mirror_clamp_to_edge = true,
.KHR_sampler_ycbcr_conversion = true,
.KHR_separate_depth_stencil_layouts = true,
- .KHR_shader_atomic_int64 = device->info.ver >= 9 &&
- device->use_softpin,
+ .KHR_shader_atomic_int64 = device->info.ver >= 9,
.KHR_shader_clock = true,
.KHR_shader_draw_parameters = true,
.KHR_shader_float16_int8 = device->info.ver >= 8,
@@ -245,6 +258,7 @@ get_device_extensions(const struct anv_physical_device *device,
.EXT_conditional_rendering = device->info.verx10 >= 75,
.EXT_conservative_rasterization = device->info.ver >= 9,
.EXT_custom_border_color = device->info.ver >= 8,
+ .EXT_depth_clip_control = true,
.EXT_depth_clip_enable = true,
.EXT_descriptor_indexing = device->has_a64_buffer_access &&
device->has_bindless_images,
@@ -256,8 +270,12 @@ get_device_extensions(const struct anv_physical_device *device,
.EXT_external_memory_dma_buf = true,
.EXT_external_memory_host = true,
.EXT_fragment_shader_interlock = device->info.ver >= 9,
- .EXT_global_priority = device->has_context_priority,
+ .EXT_global_priority = device->max_context_priority >=
+ INTEL_CONTEXT_MEDIUM_PRIORITY,
+ .EXT_global_priority_query = device->max_context_priority >=
+ INTEL_CONTEXT_MEDIUM_PRIORITY,
.EXT_host_query_reset = true,
+ .EXT_image_2d_view_of_3d = true,
.EXT_image_robustness = true,
.EXT_image_drm_format_modifier = true,
.EXT_index_type_uint8 = true,
@@ -287,6 +305,7 @@ get_device_extensions(const struct anv_physical_device *device,
.EXT_shader_viewport_index_layer = true,
.EXT_subgroup_size_control = true,
.EXT_texel_buffer_alignment = true,
+ .EXT_tooling_info = true,
.EXT_transform_feedback = true,
.EXT_vertex_attribute_divisor = true,
.EXT_ycbcr_image_arrays = true,
@@ -302,6 +321,9 @@ get_device_extensions(const struct anv_physical_device *device,
.INTEL_shader_integer_functions2 = device->info.ver >= 8,
.EXT_multi_draw = true,
.NV_compute_shader_derivatives = true,
+ .NV_mesh_shader = device->info.has_mesh_shading &&
+ nv_mesh_shading_enabled,
+ .VALVE_mutable_descriptor_type = true,
};
}
@@ -345,7 +367,7 @@ anv_gather_meminfo(struct anv_physical_device *device, int fd, bool update)
sizeof(struct drm_i915_memory_region_info)];
struct drm_i915_query_memory_regions *mem_regions =
- intel_i915_query_alloc(fd, DRM_I915_QUERY_MEMORY_REGIONS);
+ intel_i915_query_alloc(fd, DRM_I915_QUERY_MEMORY_REGIONS, NULL);
if (mem_regions == NULL) {
if (device->info.has_local_mem) {
return vk_errorf(device, VK_ERROR_INCOMPATIBLE_DRIVER,
@@ -430,26 +452,6 @@ anv_update_meminfo(struct anv_physical_device *device, int fd)
static VkResult
anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
{
- if (anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
- &device->gtt_size) == -1) {
- /* If, for whatever reason, we can't actually get the GTT size from the
- * kernel (too old?) fall back to the aperture size.
- */
- anv_perf_warn(VK_LOG_NO_OBJS(&device->instance->vk),
- "Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m");
-
- if (intel_get_aperture_size(fd, &device->gtt_size) == -1) {
- return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
- "failed to get aperture size: %m");
- }
- }
-
- /* We only allow 48-bit addresses with softpin because knowing the actual
- * address is required for the vertex cache flush workaround.
- */
- device->supports_48bit_addresses = (device->info.ver >= 8) &&
- device->gtt_size > (4ULL << 30 /* GiB */);
-
VkResult result = anv_init_meminfo(device, fd);
if (result != VK_SUCCESS)
return result;
@@ -574,8 +576,8 @@ anv_physical_device_init_uuids(struct anv_physical_device *device)
*/
_mesa_sha1_init(&sha1_ctx);
_mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
- _mesa_sha1_update(&sha1_ctx, &device->info.chipset_id,
- sizeof(device->info.chipset_id));
+ _mesa_sha1_update(&sha1_ctx, &device->info.pci_device_id,
+ sizeof(device->info.pci_device_id));
_mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
sizeof(device->always_use_bindless));
_mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
@@ -588,7 +590,7 @@ anv_physical_device_init_uuids(struct anv_physical_device *device)
memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
- intel_uuid_compute_device_id(device->device_uuid, &device->isl_dev, VK_UUID_SIZE);
+ intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
return VK_SUCCESS;
}
@@ -599,7 +601,7 @@ anv_physical_device_init_disk_cache(struct anv_physical_device *device)
#ifdef ENABLE_SHADER_CACHE
char renderer[10];
ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
- device->info.chipset_id);
+ device->info.pci_device_id);
assert(len == sizeof(renderer) - 2);
char timestamp[41];
@@ -689,7 +691,8 @@ anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
if (pdevice->engine_info) {
int gc_count =
- anv_gem_count_engines(pdevice->engine_info, I915_ENGINE_CLASS_RENDER);
+ intel_gem_count_engines(pdevice->engine_info,
+ I915_ENGINE_CLASS_RENDER);
int g_count = 0;
int c_count = 0;
@@ -769,11 +772,11 @@ anv_physical_device_try_create(struct anv_instance *instance,
}
bool is_alpha = true;
- if (devinfo.is_haswell) {
+ if (devinfo.platform == INTEL_PLATFORM_HSW) {
mesa_logw("Haswell Vulkan support is incomplete");
- } else if (devinfo.ver == 7 && !devinfo.is_baytrail) {
+ } else if (devinfo.platform == INTEL_PLATFORM_IVB) {
mesa_logw("Ivy Bridge Vulkan support is incomplete");
- } else if (devinfo.ver == 7 && devinfo.is_baytrail) {
+ } else if (devinfo.platform == INTEL_PLATFORM_BYT) {
mesa_logw("Bay Trail Vulkan support is incomplete");
} else if (devinfo.ver >= 8 && devinfo.ver <= 12) {
/* Gfx8-12 fully supported */
@@ -813,11 +816,6 @@ anv_physical_device_try_create(struct anv_instance *instance,
device->info = devinfo;
device->is_alpha = is_alpha;
- device->pci_info.domain = drm_device->businfo.pci->domain;
- device->pci_info.bus = drm_device->businfo.pci->bus;
- device->pci_info.device = drm_device->businfo.pci->dev;
- device->pci_info.function = drm_device->businfo.pci->func;
-
device->cmd_parser_version = -1;
if (device->info.ver == 7) {
device->cmd_parser_version =
@@ -848,7 +846,10 @@ anv_physical_device_try_create(struct anv_instance *instance,
goto fail_base;
}
- if (device->info.ver >= 8 && !device->info.is_cherryview &&
+ device->use_relocations = device->info.ver < 8 ||
+ device->info.platform == INTEL_PLATFORM_CHV;
+
+ if (!device->use_relocations &&
!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN)) {
result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
"kernel missing softpin");
@@ -863,12 +864,28 @@ anv_physical_device_try_create(struct anv_instance *instance,
device->has_exec_async = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC);
device->has_exec_capture = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE);
- device->has_exec_fence = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE);
- device->has_syncobj_wait = anv_gem_supports_syncobj_wait(fd);
- device->has_syncobj_wait_available =
- anv_gem_get_drm_cap(fd, DRM_CAP_SYNCOBJ_TIMELINE) != 0;
- device->has_context_priority = anv_gem_has_context_priority(fd);
+ /* Start with medium; sorted low to high */
+ const int priorities[] = {
+ INTEL_CONTEXT_MEDIUM_PRIORITY,
+ INTEL_CONTEXT_HIGH_PRIORITY,
+ INTEL_CONTEXT_REALTIME_PRIORITY,
+ };
+ device->max_context_priority = INT_MIN;
+ for (unsigned i = 0; i < ARRAY_SIZE(priorities); i++) {
+ if (!anv_gem_has_context_priority(fd, priorities[i]))
+ break;
+ device->max_context_priority = priorities[i];
+ }
+
+ device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
+ device->info.aperture_bytes;
+
+ /* We only allow 48-bit addresses with softpin because knowing the actual
+ * address is required for the vertex cache flush workaround.
+ */
+ device->supports_48bit_addresses = (device->info.ver >= 8) &&
+ device->gtt_size > (4ULL << 30 /* GiB */);
/* Initialize memory regions struct to 0. */
memset(&device->vram, 0, sizeof(device->vram));
@@ -878,9 +895,8 @@ anv_physical_device_try_create(struct anv_instance *instance,
if (result != VK_SUCCESS)
goto fail_base;
- device->use_softpin = device->info.ver >= 8 &&
- !device->info.is_cherryview;
- assert(device->use_softpin == device->supports_48bit_addresses);
+ assert(device->supports_48bit_addresses == !device->use_relocations);
+ device->use_softpin = !device->use_relocations;
device->has_context_isolation =
anv_gem_get_param(fd, I915_PARAM_HAS_CONTEXT_ISOLATION);
@@ -890,8 +906,24 @@ anv_physical_device_try_create(struct anv_instance *instance,
if (env_var_as_boolean("ANV_QUEUE_THREAD_DISABLE", false))
device->has_exec_timeline = false;
- device->has_thread_submit =
- device->has_syncobj_wait_available && device->has_exec_timeline;
+ unsigned st_idx = 0;
+
+ device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
+ if (!device->has_exec_timeline)
+ device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
+ device->sync_types[st_idx++] = &device->sync_syncobj_type;
+
+ if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
+ device->sync_types[st_idx++] = &anv_bo_sync_type;
+
+ if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
+ device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
+ device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
+ }
+
+ device->sync_types[st_idx++] = NULL;
+ assert(st_idx <= ARRAY_SIZE(device->sync_types));
+ device->vk.supported_sync_types = device->sync_types;
device->always_use_bindless =
env_var_as_boolean("ANV_ALWAYS_BINDLESS", false);
@@ -917,7 +949,8 @@ anv_physical_device_try_create(struct anv_instance *instance,
*/
device->has_bindless_samplers = device->info.ver >= 8;
- device->has_implicit_ccs = device->info.has_aux_map;
+ device->has_implicit_ccs = device->info.has_aux_map ||
+ device->info.verx10 >= 125;
/* Check if we can read the GPU timestamp register from the CPU */
uint64_t u64_ignore;
@@ -940,28 +973,12 @@ anv_physical_device_try_create(struct anv_instance *instance,
}
device->compiler->shader_debug_log = compiler_debug_log;
device->compiler->shader_perf_log = compiler_perf_log;
- device->compiler->supports_pull_constants = false;
device->compiler->constant_buffer_0_is_relative =
device->info.ver < 8 || !device->has_context_isolation;
device->compiler->supports_shader_constants = true;
- device->compiler->compact_params = false;
device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
- /* Broadwell PRM says:
- *
- * "Before Gfx8, there was a historical configuration control field to
- * swizzle address bit[6] for in X/Y tiling modes. This was set in three
- * different places: TILECTL[1:0], ARB_MODE[5:4], and
- * DISP_ARB_CTL[14:13].
- *
- * For Gfx8 and subsequent generations, the swizzle fields are all
- * reserved, and the CPU's memory controller performs all address
- * swizzling modifications."
- */
- bool swizzled =
- device->info.ver < 8 && anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
-
- isl_device_init(&device->isl_dev, &device->info, swizzled);
+ isl_device_init(&device->isl_dev, &device->info);
result = anv_physical_device_init_uuids(device);
if (result != VK_SUCCESS)
@@ -986,6 +1003,8 @@ anv_physical_device_try_create(struct anv_instance *instance,
device->engine_info = anv_gem_get_engine_info(fd);
anv_physical_device_init_queue_families(device);
+ device->local_fd = fd;
+
result = anv_init_wsi(device);
if (result != VK_SUCCESS)
goto fail_engine_info;
@@ -996,8 +1015,6 @@ anv_physical_device_try_create(struct anv_instance *instance,
get_device_extensions(device, &device->vk.supported_extensions);
- device->local_fd = fd;
-
anv_genX(&device->info, init_physical_device_state)(device);
*device_out = device;
@@ -1081,6 +1098,9 @@ anv_init_dri_options(struct anv_instance *instance)
instance->vk.app_info.app_version,
instance->vk.app_info.engine_name,
instance->vk.app_info.engine_version);
+
+ instance->assume_full_subgroups =
+ driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups");
}
VkResult anv_CreateInstance(
@@ -1124,6 +1144,8 @@ VkResult anv_CreateInstance(
anv_init_dri_options(instance);
+ intel_driver_ds_init();
+
*pInstance = anv_instance_to_handle(instance);
return VK_SUCCESS;
@@ -1201,7 +1223,8 @@ VkResult anv_EnumeratePhysicalDevices(
VkPhysicalDevice* pPhysicalDevices)
{
ANV_FROM_HANDLE(anv_instance, instance, _instance);
- VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount);
+ VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out,
+ pPhysicalDevices, pPhysicalDeviceCount);
VkResult result = anv_enumerate_physical_devices(instance);
if (result != VK_SUCCESS)
@@ -1209,7 +1232,7 @@ VkResult anv_EnumeratePhysicalDevices(
list_for_each_entry(struct anv_physical_device, pdevice,
&instance->physical_devices, link) {
- vk_outarray_append(&out, i) {
+ vk_outarray_append_typed(VkPhysicalDevice, &out, i) {
*i = anv_physical_device_to_handle(pdevice);
}
}
@@ -1223,8 +1246,9 @@ VkResult anv_EnumeratePhysicalDeviceGroups(
VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
{
ANV_FROM_HANDLE(anv_instance, instance, _instance);
- VK_OUTARRAY_MAKE(out, pPhysicalDeviceGroupProperties,
- pPhysicalDeviceGroupCount);
+ VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out,
+ pPhysicalDeviceGroupProperties,
+ pPhysicalDeviceGroupCount);
VkResult result = anv_enumerate_physical_devices(instance);
if (result != VK_SUCCESS)
@@ -1232,7 +1256,7 @@ VkResult anv_EnumeratePhysicalDeviceGroups(
list_for_each_entry(struct anv_physical_device, pdevice,
&instance->physical_devices, link) {
- vk_outarray_append(&out, p) {
+ vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p) {
p->physicalDeviceCount = 1;
memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
p->physicalDevices[0] = anv_physical_device_to_handle(pdevice);
@@ -1279,7 +1303,7 @@ void anv_GetPhysicalDeviceFeatures(
.multiViewport = true,
.samplerAnisotropy = true,
.textureCompressionETC2 = pdevice->info.ver >= 8 ||
- pdevice->info.is_baytrail,
+ pdevice->info.platform == INTEL_PLATFORM_BYT,
.textureCompressionASTC_LDR = has_astc_ldr,
.textureCompressionBC = true,
.occlusionQueryPrecise = true,
@@ -1352,8 +1376,7 @@ anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice,
f->storageBuffer8BitAccess = pdevice->info.ver >= 8;
f->uniformAndStorageBuffer8BitAccess = pdevice->info.ver >= 8;
f->storagePushConstant8 = pdevice->info.ver >= 8;
- f->shaderBufferInt64Atomics = pdevice->info.ver >= 9 &&
- pdevice->use_softpin;
+ f->shaderBufferInt64Atomics = pdevice->info.ver >= 9;
f->shaderSharedInt64Atomics = false;
f->shaderFloat16 = pdevice->info.ver >= 8;
f->shaderInt8 = pdevice->info.ver >= 8;
@@ -1364,7 +1387,7 @@ anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice,
f->shaderInputAttachmentArrayDynamicIndexing = false;
f->shaderUniformTexelBufferArrayDynamicIndexing = descIndexing;
f->shaderStorageTexelBufferArrayDynamicIndexing = descIndexing;
- f->shaderUniformBufferArrayNonUniformIndexing = descIndexing;
+ f->shaderUniformBufferArrayNonUniformIndexing = false;
f->shaderSampledImageArrayNonUniformIndexing = descIndexing;
f->shaderStorageBufferArrayNonUniformIndexing = descIndexing;
f->shaderStorageImageArrayNonUniformIndexing = descIndexing;
@@ -1401,6 +1424,29 @@ anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice,
f->subgroupBroadcastDynamicId = true;
}
+static void
+anv_get_physical_device_features_1_3(struct anv_physical_device *pdevice,
+ VkPhysicalDeviceVulkan13Features *f)
+{
+ assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES);
+
+ f->robustImageAccess = true;
+ f->inlineUniformBlock = true;
+ f->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
+ f->pipelineCreationCacheControl = true;
+ f->privateData = true;
+ f->shaderDemoteToHelperInvocation = true;
+ f->shaderTerminateInvocation = true;
+ f->subgroupSizeControl = true;
+ f->computeFullSubgroups = true;
+ f->synchronization2 = true;
+ f->textureCompressionASTC_HDR = false;
+ f->shaderZeroInitializeWorkgroupMemory = true;
+ f->dynamicRendering = true;
+ f->shaderIntegerDotProduct = true;
+ f->maintenance4 = true;
+}
+
void anv_GetPhysicalDeviceFeatures2(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures2* pFeatures)
@@ -1418,11 +1464,18 @@ void anv_GetPhysicalDeviceFeatures2(
};
anv_get_physical_device_features_1_2(pdevice, &core_1_2);
+ VkPhysicalDeviceVulkan13Features core_1_3 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
+ };
+ anv_get_physical_device_features_1_3(pdevice, &core_1_3);
+
vk_foreach_struct(ext, pFeatures->pNext) {
if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
continue;
if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
continue;
+ if (vk_get_physical_device_core_1_3_feature_ext(ext, &core_1_3))
+ continue;
switch (ext->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
@@ -1433,7 +1486,6 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
-
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: {
VkPhysicalDeviceAccelerationStructureFeaturesKHR *features = (void *)ext;
features->accelerationStructure = false;
@@ -1460,6 +1512,14 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: {
+ VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features =
+ (VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *)ext;
+ features->image2DViewOf3D = true;
+ features->sampler2DViewOf3D = pdevice->info.ver >= 9;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
(VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
@@ -1491,6 +1551,13 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR: {
+ VkPhysicalDeviceDynamicRenderingFeaturesKHR *features =
+ (VkPhysicalDeviceDynamicRenderingFeaturesKHR *)ext;
+ features->dynamicRendering = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: {
VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *features =
(VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *)ext;
@@ -1500,19 +1567,22 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_EXT: {
+ VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *features =
+ (VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *)ext;
+ features->globalPriorityQuery = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
(VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
features->attachmentFragmentShadingRate = false;
features->pipelineFragmentShadingRate = true;
- features->primitiveFragmentShadingRate = false;
- break;
- }
-
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
- VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
- (VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
- features->robustImageAccess = true;
+ features->primitiveFragmentShadingRate =
+ pdevice->info.has_coarse_pixel_primitive_and_cb;
+ features->attachmentFragmentShadingRate =
+ pdevice->info.has_coarse_pixel_primitive_and_cb;
break;
}
@@ -1523,18 +1593,14 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
- VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
- (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
- features->inlineUniformBlock = true;
- features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
- break;
- }
-
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
(VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
- features->rectangularLines = true;
+ /* Rectangular lines must use the strict algorithm, which is not
+ * supported for wide lines prior to ICL. See rasterization_mode for
+ * details and how the HW states are programmed.
+ */
+ features->rectangularLines = pdevice->info.ver >= 10;
features->bresenhamLines = true;
/* Support for Smooth lines with MSAA was removed on gfx11. From the
* BSpec section "Multisample ModesState" table for "AA Line Support
@@ -1558,6 +1624,21 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV: {
+ VkPhysicalDeviceMeshShaderFeaturesNV *features =
+ (VkPhysicalDeviceMeshShaderFeaturesNV *)ext;
+ features->taskShader = pdevice->vk.supported_extensions.NV_mesh_shader;
+ features->meshShader = pdevice->vk.supported_extensions.NV_mesh_shader;
+ break;
+ }
+
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
+ VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
+ (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
+ features->mutableDescriptorType = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
(VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
@@ -1567,13 +1648,6 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
- VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
- (VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
- features->pipelineCreationCacheControl = true;
- break;
- }
-
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
(VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
@@ -1581,12 +1655,6 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
- VkPhysicalDevicePrivateDataFeaturesEXT *features = (void *)ext;
- features->privateData = true;
- break;
- }
-
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
(VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
@@ -1595,6 +1663,12 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR: {
+ VkPhysicalDeviceRayQueryFeaturesKHR *features = (void *)ext;
+ features->rayQuery = pdevice->info.has_ray_tracing;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
features->robustBufferAccess2 = true;
@@ -1607,7 +1681,8 @@ void anv_GetPhysicalDeviceFeatures2(
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (void *)ext;
features->shaderBufferFloat32Atomics = true;
features->shaderBufferFloat32AtomicAdd = pdevice->info.has_lsc;
- features->shaderBufferFloat64Atomics = pdevice->info.has_lsc;
+ features->shaderBufferFloat64Atomics =
+ pdevice->info.has_64bit_float && pdevice->info.has_lsc;
features->shaderBufferFloat64AtomicAdd = false;
features->shaderSharedFloat32Atomics = true;
features->shaderSharedFloat32AtomicAdd = false;
@@ -1626,7 +1701,8 @@ void anv_GetPhysicalDeviceFeatures2(
features->shaderBufferFloat16AtomicAdd = false;
features->shaderBufferFloat16AtomicMinMax = false;
features->shaderBufferFloat32AtomicMinMax = pdevice->info.ver >= 9;
- features->shaderBufferFloat64AtomicMinMax = pdevice->info.has_lsc;
+ features->shaderBufferFloat64AtomicMinMax =
+ pdevice->info.has_64bit_float && pdevice->info.has_lsc;
features->shaderSharedFloat16Atomics = false;
features->shaderSharedFloat16AtomicAdd = false;
features->shaderSharedFloat16AtomicMinMax = false;
@@ -1637,12 +1713,6 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
- VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features = (void *)ext;
- features->shaderDemoteToHelperInvocation = true;
- break;
- }
-
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
VkPhysicalDeviceShaderClockFeaturesKHR *features =
(VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
@@ -1658,13 +1728,6 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_FEATURES_KHR: {
- VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR *features =
- (VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR *)ext;
- features->shaderIntegerDotProduct = true;
- break;
- };
-
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: {
VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features =
(VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext;
@@ -1672,28 +1735,6 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR: {
- VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *features =
- (VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *)ext;
- features->shaderTerminateInvocation = true;
- break;
- }
-
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
- VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
- (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
- features->subgroupSizeControl = true;
- features->computeFullSubgroups = true;
- break;
- }
-
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR: {
- VkPhysicalDeviceSynchronization2FeaturesKHR *features =
- (VkPhysicalDeviceSynchronization2FeaturesKHR *)ext;
- features->synchronization2 = true;
- break;
- }
-
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
@@ -1750,13 +1791,6 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {
- VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =
- (VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *)ext;
- features->shaderZeroInitializeWorkgroupMemory = true;
- break;
- }
-
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
features->multiDraw = true;
@@ -1771,6 +1805,13 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: {
+ VkPhysicalDeviceDepthClipControlFeaturesEXT *features =
+ (VkPhysicalDeviceDepthClipControlFeaturesEXT *)ext;
+ features->depthClipControl = true;
+ break;
+ }
+
default:
anv_debug_ignored_stype(ext->sType);
break;
@@ -1809,7 +1850,8 @@ void anv_GetPhysicalDeviceProperties(
pdevice->has_bindless_images && pdevice->has_a64_buffer_access
? UINT32_MAX : MAX_BINDING_TABLE_SIZE - MAX_RTS - 1;
- const uint32_t max_workgroup_size = 32 * devinfo->max_cs_workgroup_threads;
+ const uint32_t max_workgroup_size =
+ MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
VkSampleCountFlags sample_counts =
isl_device_get_sample_counts(&pdevice->isl_dev);
@@ -1822,12 +1864,12 @@ void anv_GetPhysicalDeviceProperties(
.maxImageDimensionCube = (1 << 14),
.maxImageArrayLayers = (1 << 11),
.maxTexelBufferElements = 128 * 1024 * 1024,
- .maxUniformBufferRange = (1ul << 27),
+ .maxUniformBufferRange = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
.maxStorageBufferRange = pdevice->isl_dev.max_buffer_size,
.maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
.maxMemoryAllocationCount = UINT32_MAX,
.maxSamplerAllocationCount = 64 * 1024,
- .bufferImageGranularity = 64, /* A cache line */
+ .bufferImageGranularity = 1,
.sparseAddressSpaceSize = 0,
.maxBoundDescriptorSets = MAX_SETS,
.maxPerStageDescriptorSamplers = max_samplers,
@@ -1845,10 +1887,22 @@ void anv_GetPhysicalDeviceProperties(
.maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
.maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */
.maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
- .maxVertexInputAttributes = MAX_VBS,
+ .maxVertexInputAttributes = MAX_VES,
.maxVertexInputBindings = MAX_VBS,
+ /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
+ *
+ * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
+ */
.maxVertexInputAttributeOffset = 2047,
- .maxVertexInputBindingStride = 2048,
+ /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
+ *
+ * VERTEX_BUFFER_STATE::Buffer Pitch: [0,2048]
+ *
+ * Skylake PRMs: Volume 2d: Command Reference: Structures:
+ *
+ * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
+ */
+ .maxVertexInputBindingStride = devinfo->ver < 9 ? 2048 : 4095,
.maxVertexOutputComponents = 128,
.maxTessellationGenerationLevel = 64,
.maxTessellationPatchSize = 32,
@@ -1866,7 +1920,7 @@ void anv_GetPhysicalDeviceProperties(
.maxFragmentInputComponents = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
.maxFragmentOutputAttachments = 8,
.maxFragmentDualSrcAttachments = 1,
- .maxFragmentCombinedOutputResources = 8,
+ .maxFragmentCombinedOutputResources = MAX_RTS + max_ssbos + max_images,
.maxComputeSharedMemorySize = 64 * 1024,
.maxComputeWorkGroupCount = { 65535, 65535, 65535 },
.maxComputeWorkGroupInvocations = max_workgroup_size,
@@ -1926,7 +1980,7 @@ void anv_GetPhysicalDeviceProperties(
* Since the Windows driver does the same, it's probably fair to assume
* that no one needs more than this.
*/
- .lineWidthRange = { 0.0, 7.9921875 },
+ .lineWidthRange = { 0.0, devinfo->ver >= 9 ? 8.0 : 7.9921875 },
.pointSizeGranularity = (1.0 / 8.0),
.lineWidthGranularity = (1.0 / 128.0),
.strictLines = false,
@@ -1940,7 +1994,7 @@ void anv_GetPhysicalDeviceProperties(
.apiVersion = ANV_API_VERSION,
.driverVersion = vk_get_driver_version(),
.vendorID = 0x8086,
- .deviceID = pdevice->info.chipset_id,
+ .deviceID = pdevice->info.pci_device_id,
.deviceType = pdevice->info.has_local_mem ?
VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
@@ -1980,6 +2034,10 @@ anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR;
}
+ if (pdevice->vk.supported_extensions.NV_mesh_shader) {
+ scalar_stages |= VK_SHADER_STAGE_TASK_BIT_NV |
+ VK_SHADER_STAGE_MESH_BIT_NV;
+ }
p->subgroupSupportedStages = scalar_stages;
p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
VK_SUBGROUP_FEATURE_VOTE_BIT |
@@ -2036,7 +2094,7 @@ anv_get_physical_device_properties_1_2(struct anv_physical_device *pdevice,
else {
p->conformanceVersion = (VkConformanceVersionKHR) {
.major = 1,
- .minor = 2,
+ .minor = 3,
.subminor = 0,
.patch = 0,
};
@@ -2139,6 +2197,88 @@ anv_get_physical_device_properties_1_2(struct anv_physical_device *pdevice,
isl_device_get_sample_counts(&pdevice->isl_dev);
}
+static void
+anv_get_physical_device_properties_1_3(struct anv_physical_device *pdevice,
+ VkPhysicalDeviceVulkan13Properties *p)
+{
+ assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES);
+
+ p->minSubgroupSize = 8;
+ p->maxSubgroupSize = 32;
+ p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
+ p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
+ VK_SHADER_STAGE_TASK_BIT_NV |
+ VK_SHADER_STAGE_MESH_BIT_NV;
+
+ p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
+ p->maxPerStageDescriptorInlineUniformBlocks =
+ MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
+ p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
+ MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
+ p->maxDescriptorSetInlineUniformBlocks =
+ MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
+ p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
+ MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
+ p->maxInlineUniformTotalSize = UINT16_MAX;
+
+ p->integerDotProduct8BitUnsignedAccelerated = false;
+ p->integerDotProduct8BitSignedAccelerated = false;
+ p->integerDotProduct8BitMixedSignednessAccelerated = false;
+ p->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
+ p->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
+ p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
+ p->integerDotProduct16BitUnsignedAccelerated = false;
+ p->integerDotProduct16BitSignedAccelerated = false;
+ p->integerDotProduct16BitMixedSignednessAccelerated = false;
+ p->integerDotProduct32BitUnsignedAccelerated = false;
+ p->integerDotProduct32BitSignedAccelerated = false;
+ p->integerDotProduct32BitMixedSignednessAccelerated = false;
+ p->integerDotProduct64BitUnsignedAccelerated = false;
+ p->integerDotProduct64BitSignedAccelerated = false;
+ p->integerDotProduct64BitMixedSignednessAccelerated = false;
+ p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
+ p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
+ p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
+ p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
+ p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
+ p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
+ p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
+ p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
+ p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
+ p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
+ p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
+ p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
+ p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
+ p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
+ p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
+
+ /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
+ * Base Address:
+ *
+ * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
+ * specifies the base address of the first element of the surface,
+ * computed in software by adding the surface base address to the
+ * byte offset of the element in the buffer. The base address must
+ * be aligned to element size."
+ *
+ * The typed dataport messages require that things be texel aligned.
+ * Otherwise, we may just load/store the wrong data or, in the worst
+ * case, there may be hangs.
+ */
+ p->storageTexelBufferOffsetAlignmentBytes = 16;
+ p->storageTexelBufferOffsetSingleTexelAlignment = true;
+
+ /* The sampler, however, is much more forgiving and it can handle
+ * arbitrary byte alignment for linear and buffer surfaces. It's
+ * hard to find a good PRM citation for this but years of empirical
+ * experience demonstrate that this is true.
+ */
+ p->uniformTexelBufferOffsetAlignmentBytes = 1;
+ p->uniformTexelBufferOffsetSingleTexelAlignment = false;
+
+ p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
+}
+
void anv_GetPhysicalDeviceProperties2(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties2* pProperties)
@@ -2157,11 +2297,18 @@ void anv_GetPhysicalDeviceProperties2(
};
anv_get_physical_device_properties_1_2(pdevice, &core_1_2);
+ VkPhysicalDeviceVulkan13Properties core_1_3 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
+ };
+ anv_get_physical_device_properties_1_3(pdevice, &core_1_3);
+
vk_foreach_struct(ext, pProperties->pNext) {
if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
continue;
if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
continue;
+ if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
+ continue;
switch (ext->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: {
@@ -2211,31 +2358,51 @@ void anv_GetPhysicalDeviceProperties2(
break;
}
-
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
(VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
- /* Those must be 0 if attachmentFragmentShadingRate is not
- * supported.
- */
- props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
- props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
- props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
-
- props->primitiveFragmentShadingRateWithMultipleViewports = false;
- props->layeredShadingRateAttachments = false;
- props->fragmentShadingRateNonTrivialCombinerOps = false;
+ props->primitiveFragmentShadingRateWithMultipleViewports =
+ pdevice->info.has_coarse_pixel_primitive_and_cb;
+ props->layeredShadingRateAttachments = pdevice->info.has_coarse_pixel_primitive_and_cb;
+ props->fragmentShadingRateNonTrivialCombinerOps =
+ pdevice->info.has_coarse_pixel_primitive_and_cb;
props->maxFragmentSize = (VkExtent2D) { 4, 4 };
- props->maxFragmentSizeAspectRatio = 4;
- props->maxFragmentShadingRateCoverageSamples = 4 * 4 * 16;
- props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_16_BIT;
+ props->maxFragmentSizeAspectRatio =
+ pdevice->info.has_coarse_pixel_primitive_and_cb ?
+ 2 : 4;
+ props->maxFragmentShadingRateCoverageSamples = 4 * 4 *
+ (pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16);
+ props->maxFragmentShadingRateRasterizationSamples =
+ pdevice->info.has_coarse_pixel_primitive_and_cb ?
+ VK_SAMPLE_COUNT_4_BIT : VK_SAMPLE_COUNT_16_BIT;
props->fragmentShadingRateWithShaderDepthStencilWrites = false;
props->fragmentShadingRateWithSampleMask = true;
props->fragmentShadingRateWithShaderSampleMask = false;
props->fragmentShadingRateWithConservativeRasterization = true;
props->fragmentShadingRateWithFragmentShaderInterlock = true;
props->fragmentShadingRateWithCustomSampleLocations = true;
- props->fragmentShadingRateStrictMultiplyCombiner = false;
+
+ /* Fix in DG2_G10_C0 and DG2_G11_B0. Consider any other Sku as having
+ * the fix.
+ */
+ props->fragmentShadingRateStrictMultiplyCombiner =
+ pdevice->info.platform == INTEL_PLATFORM_DG2_G10 ?
+ pdevice->info.revision >= 8 :
+ pdevice->info.platform == INTEL_PLATFORM_DG2_G11 ?
+ pdevice->info.revision >= 4 : true;
+
+ if (pdevice->info.has_coarse_pixel_primitive_and_cb) {
+ props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
+ props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
+ props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
+ } else {
+ /* Those must be 0 if attachmentFragmentShadingRate is not
+ * supported.
+ */
+ props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
+ props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
+ props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
+ }
break;
}
@@ -2262,21 +2429,6 @@ void anv_GetPhysicalDeviceProperties2(
break;
}
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
- VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
- (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
- props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
- props->maxPerStageDescriptorInlineUniformBlocks =
- MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
- props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
- MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
- props->maxDescriptorSetInlineUniformBlocks =
- MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
- props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
- MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
- break;
- }
-
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
(VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
@@ -2303,13 +2455,73 @@ void anv_GetPhysicalDeviceProperties2(
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV: {
+ VkPhysicalDeviceMeshShaderPropertiesNV *props =
+ (VkPhysicalDeviceMeshShaderPropertiesNV *)ext;
+
+ /* Bounded by the maximum representable size in
+ * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize. Same for Task.
+ */
+ const uint32_t max_slm_size = 64 * 1024;
+
+ /* Bounded by the maximum representable size in
+ * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum. Same for Task.
+ */
+ const uint32_t max_workgroup_size = 1 << 10;
+
+ /* Bounded by the maximum representable count in
+ * 3DSTATE_MESH_SHADER_BODY::MaximumPrimitiveCount.
+ */
+ const uint32_t max_primitives = 1024;
+
+ /* TODO(mesh): Multiview. */
+ const uint32_t max_view_count = 1;
+
+ props->maxDrawMeshTasksCount = UINT32_MAX;
+
+ /* TODO(mesh): Implement workgroup Y and Z sizes larger than one by
+ * mapping them to/from the single value that HW provides us
+ * (currently used for X).
+ */
+
+ props->maxTaskWorkGroupInvocations = max_workgroup_size;
+ props->maxTaskWorkGroupSize[0] = max_workgroup_size;
+ props->maxTaskWorkGroupSize[1] = 1;
+ props->maxTaskWorkGroupSize[2] = 1;
+ props->maxTaskTotalMemorySize = max_slm_size;
+ props->maxTaskOutputCount = UINT16_MAX;
+
+ props->maxMeshWorkGroupInvocations = max_workgroup_size;
+ props->maxMeshWorkGroupSize[0] = max_workgroup_size;
+ props->maxMeshWorkGroupSize[1] = 1;
+ props->maxMeshWorkGroupSize[2] = 1;
+ props->maxMeshTotalMemorySize = max_slm_size / max_view_count;
+ props->maxMeshOutputPrimitives = max_primitives / max_view_count;
+ props->maxMeshMultiviewViewCount = max_view_count;
+
+ /* Depends on what indices can be represented with IndexFormat. For
+ * now we always use U32, so bound to the maximum unique vertices we
+ * need for the maximum primitives.
+ *
+ * TODO(mesh): Revisit this if we drop "U32" IndexFormat when adding
+ * support for others.
+ */
+ props->maxMeshOutputVertices = 3 * props->maxMeshOutputPrimitives;
+
+
+ props->meshOutputPerVertexGranularity = 32;
+ props->meshOutputPerPrimitiveGranularity = 32;
+
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
- properties->pciDomain = pdevice->pci_info.domain;
- properties->pciBus = pdevice->pci_info.bus;
- properties->pciDevice = pdevice->pci_info.device;
- properties->pciFunction = pdevice->pci_info.function;
+ properties->pciDomain = pdevice->info.pci_domain;
+ properties->pciBus = pdevice->info.pci_bus;
+ properties->pciDevice = pdevice->info.pci_dev;
+ properties->pciFunction = pdevice->info.pci_func;
break;
}
@@ -2357,55 +2569,6 @@ void anv_GetPhysicalDeviceProperties2(
break;
}
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_PROPERTIES_KHR: {
- VkPhysicalDeviceShaderIntegerDotProductPropertiesKHR *props =
- (VkPhysicalDeviceShaderIntegerDotProductPropertiesKHR *)ext;
-
- props->integerDotProduct8BitUnsignedAccelerated = false;
- props->integerDotProduct8BitSignedAccelerated = false;
- props->integerDotProduct8BitMixedSignednessAccelerated = false;
- props->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
- props->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
- props->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
- props->integerDotProduct16BitUnsignedAccelerated = false;
- props->integerDotProduct16BitSignedAccelerated = false;
- props->integerDotProduct16BitMixedSignednessAccelerated = false;
- props->integerDotProduct32BitUnsignedAccelerated = false;
- props->integerDotProduct32BitSignedAccelerated = false;
- props->integerDotProduct32BitMixedSignednessAccelerated = false;
- props->integerDotProduct64BitUnsignedAccelerated = false;
- props->integerDotProduct64BitSignedAccelerated = false;
- props->integerDotProduct64BitMixedSignednessAccelerated = false;
- props->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
- props->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
- props->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
- props->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
- props->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
- props->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
- props->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
- props->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
- props->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
- props->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
- props->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
- props->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
- props->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
- props->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
- props->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
-
- break;
- }
-
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
- VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
- (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
- STATIC_ASSERT(8 <= BRW_SUBGROUP_SIZE && BRW_SUBGROUP_SIZE <= 32);
- props->minSubgroupSize = 8;
- props->maxSubgroupSize = 32;
- props->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
- props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
- break;
- }
-
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
VkPhysicalDeviceSampleLocationsPropertiesEXT *props =
(VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
@@ -2425,36 +2588,6 @@ void anv_GetPhysicalDeviceProperties2(
break;
}
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
- VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *props =
- (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
-
- /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
- * Base Address:
- *
- * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
- * specifies the base address of the first element of the surface,
- * computed in software by adding the surface base address to the
- * byte offset of the element in the buffer. The base address must
- * be aligned to element size."
- *
- * The typed dataport messages require that things be texel aligned.
- * Otherwise, we may just load/store the wrong data or, in the worst
- * case, there may be hangs.
- */
- props->storageTexelBufferOffsetAlignmentBytes = 16;
- props->storageTexelBufferOffsetSingleTexelAlignment = true;
-
- /* The sampler, however, is much more forgiving and it can handle
- * arbitrary byte alignment for linear and buffer surfaces. It's
- * hard to find a good PRM citation for this but years of empirical
- * experience demonstrate that this is true.
- */
- props->uniformTexelBufferOffsetAlignmentBytes = 1;
- props->uniformTexelBufferOffsetSingleTexelAlignment = false;
- break;
- }
-
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
VkPhysicalDeviceTransformFeedbackPropertiesEXT *props =
(VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
@@ -2494,47 +2627,74 @@ void anv_GetPhysicalDeviceProperties2(
}
}
+static int
+vk_priority_to_gen(int priority)
+{
+ switch (priority) {
+ case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
+ return INTEL_CONTEXT_LOW_PRIORITY;
+ case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
+ return INTEL_CONTEXT_MEDIUM_PRIORITY;
+ case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
+ return INTEL_CONTEXT_HIGH_PRIORITY;
+ case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
+ return INTEL_CONTEXT_REALTIME_PRIORITY;
+ default:
+ unreachable("Invalid priority");
+ }
+}
+
static const VkQueueFamilyProperties
anv_queue_family_properties_template = {
.timestampValidBits = 36, /* XXX: Real value here */
.minImageTransferGranularity = { 1, 1, 1 },
};
-void anv_GetPhysicalDeviceQueueFamilyProperties(
- VkPhysicalDevice physicalDevice,
- uint32_t* pCount,
- VkQueueFamilyProperties* pQueueFamilyProperties)
-{
- ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
- VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pCount);
-
- for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
- struct anv_queue_family *queue_family = &pdevice->queue.families[i];
- vk_outarray_append(&out, p) {
- *p = anv_queue_family_properties_template;
- p->queueFlags = queue_family->queueFlags;
- p->queueCount = queue_family->queueCount;
- }
- }
-}
-
void anv_GetPhysicalDeviceQueueFamilyProperties2(
VkPhysicalDevice physicalDevice,
uint32_t* pQueueFamilyPropertyCount,
VkQueueFamilyProperties2* pQueueFamilyProperties)
{
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
- VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount);
+ VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
+ pQueueFamilyProperties, pQueueFamilyPropertyCount);
for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
struct anv_queue_family *queue_family = &pdevice->queue.families[i];
- vk_outarray_append(&out, p) {
+ vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
p->queueFamilyProperties = anv_queue_family_properties_template;
p->queueFamilyProperties.queueFlags = queue_family->queueFlags;
p->queueFamilyProperties.queueCount = queue_family->queueCount;
- vk_foreach_struct(s, p->pNext) {
- anv_debug_ignored_stype(s->sType);
+ vk_foreach_struct(ext, p->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_EXT: {
+ VkQueueFamilyGlobalPriorityPropertiesEXT *properties =
+ (VkQueueFamilyGlobalPriorityPropertiesEXT *)ext;
+
+ /* Deliberately sorted low to high */
+ VkQueueGlobalPriorityEXT all_priorities[] = {
+ VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT,
+ VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT,
+ VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT,
+ VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT,
+ };
+
+ uint32_t count = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
+ if (vk_priority_to_gen(all_priorities[i]) >
+ pdevice->max_context_priority)
+ break;
+
+ properties->priorities[count++] = all_priorities[i];
+ }
+ properties->priorityCount = count;
+ break;
+ }
+
+ default:
+ anv_debug_ignored_stype(ext->sType);
+ }
}
}
}
@@ -2719,7 +2879,7 @@ anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align,
static void
anv_device_init_border_colors(struct anv_device *device)
{
- if (device->info.is_haswell) {
+ if (device->info.platform == INTEL_PLATFORM_HSW) {
static const struct hsw_border_color border_colors[] = {
[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
[VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
@@ -2773,23 +2933,6 @@ anv_device_init_trivial_batch(struct anv_device *device)
return VK_SUCCESS;
}
-static int
-vk_priority_to_gen(int priority)
-{
- switch (priority) {
- case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
- return INTEL_CONTEXT_LOW_PRIORITY;
- case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
- return INTEL_CONTEXT_MEDIUM_PRIORITY;
- case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
- return INTEL_CONTEXT_HIGH_PRIORITY;
- case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
- return INTEL_CONTEXT_REALTIME_PRIORITY;
- default:
- unreachable("Invalid priority");
- }
-}
-
static bool
get_bo_from_pool(struct intel_batch_decode_bo *ret,
struct anv_block_pool *pool,
@@ -2889,6 +3032,8 @@ static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
.free = intel_aux_map_buffer_free,
};
+static VkResult anv_device_check_status(struct vk_device *vk_device);
+
VkResult anv_CreateDevice(
VkPhysicalDevice physicalDevice,
const VkDeviceCreateInfo* pCreateInfo,
@@ -2971,10 +3116,14 @@ VkResult anv_CreateDevice(
&physical_device->info,
stderr, decode_flags, NULL,
decode_get_bo, NULL, device);
+
+ device->decoder_ctx.dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
+ device->decoder_ctx.surface_base = SURFACE_STATE_POOL_MIN_ADDRESS;
+ device->decoder_ctx.instruction_base =
+ INSTRUCTION_STATE_POOL_MIN_ADDRESS;
}
device->physical = physical_device;
- device->_lost = false;
/* XXX(chadv): Can we dup() physicalDevice->fd here? */
device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
@@ -2983,6 +3132,10 @@ VkResult anv_CreateDevice(
goto fail_device;
}
+ device->vk.check_status = anv_device_check_status;
+ device->vk.create_sync_for_memory = anv_create_sync_for_memory;
+ vk_device_set_drm_fd(&device->vk, device->fd);
+
uint32_t num_queues = 0;
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
@@ -3005,9 +3158,9 @@ VkResult anv_CreateDevice(
engine_classes[engine_count++] = queue_family->engine_class;
}
device->context_id =
- anv_gem_create_context_engines(device,
- physical_device->engine_info,
- engine_count, engine_classes);
+ intel_gem_create_context_engines(device->fd,
+ physical_device->engine_info,
+ engine_count, engine_classes);
} else {
assert(num_queues == 1);
device->context_id = anv_gem_create_context(device);
@@ -3026,8 +3179,6 @@ VkResult anv_CreateDevice(
anv_gem_set_context_param(device->fd, device->context_id,
I915_CONTEXT_PARAM_RECOVERABLE, false);
- device->has_thread_submit = physical_device->has_thread_submit;
-
device->queues =
vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
@@ -3058,7 +3209,7 @@ VkResult anv_CreateDevice(
}
}
- if (physical_device->use_softpin) {
+ if (!anv_use_relocations(physical_device)) {
if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
goto fail_queues;
@@ -3087,7 +3238,7 @@ VkResult anv_CreateDevice(
* have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT
* is returned.
*/
- if (physical_device->has_context_priority) {
+ if (physical_device->max_context_priority >= INTEL_CONTEXT_MEDIUM_PRIORITY) {
int err = anv_gem_set_context_param(device->fd, device->context_id,
I915_CONTEXT_PARAM_PRIORITY,
vk_priority_to_gen(priority));
@@ -3178,17 +3329,26 @@ VkResult anv_CreateDevice(
if (result != VK_SUCCESS)
goto fail_instruction_state_pool;
- if (physical_device->use_softpin) {
+ if (device->info.verx10 >= 125) {
+ /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding
+ * table its own base address separately from surface state base.
+ */
+ result = anv_state_pool_init(&device->binding_table_pool, device,
+ "binding table pool",
+ BINDING_TABLE_POOL_MIN_ADDRESS, 0,
+ BINDING_TABLE_POOL_BLOCK_SIZE);
+ } else if (!anv_use_relocations(physical_device)) {
int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
(int64_t)SURFACE_STATE_POOL_MIN_ADDRESS;
assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
result = anv_state_pool_init(&device->binding_table_pool, device,
"binding table pool",
SURFACE_STATE_POOL_MIN_ADDRESS,
- bt_pool_offset, 4096);
- if (result != VK_SUCCESS)
- goto fail_surface_state_pool;
+ bt_pool_offset,
+ BINDING_TABLE_POOL_BLOCK_SIZE);
}
+ if (result != VK_SUCCESS)
+ goto fail_surface_state_pool;
if (device->info.has_aux_map) {
device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
@@ -3219,9 +3379,44 @@ VkResult anv_CreateDevice(
device->workaround_bo->size,
INTEL_DEBUG_BLOCK_TYPE_FRAME);
+ if (device->vk.enabled_extensions.KHR_ray_query) {
+ uint32_t ray_queries_size =
+ align_u32(brw_rt_ray_queries_hw_stacks_size(&device->info), 4096);
+
+ result = anv_device_alloc_bo(device, "ray queries",
+ ray_queries_size,
+ ANV_BO_ALLOC_LOCAL_MEM,
+ 0 /* explicit_address */,
+ &device->ray_query_bo);
+ if (result != VK_SUCCESS)
+ goto fail_workaround_bo;
+ }
+
result = anv_device_init_trivial_batch(device);
if (result != VK_SUCCESS)
- goto fail_workaround_bo;
+ goto fail_ray_query_bo;
+
+ if (device->info.ver >= 12 &&
+ device->vk.enabled_extensions.KHR_fragment_shading_rate) {
+ uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */
+
+ if (device->info.has_coarse_pixel_primitive_and_cb)
+ n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */
+
+ n_cps_states += 1; /* Disable CPS */
+
+ /* Each of the combinaison must be replicated on all viewports */
+ n_cps_states *= MAX_VIEWPORTS;
+
+ device->cps_states =
+ anv_state_pool_alloc(&device->dynamic_state_pool,
+ n_cps_states * CPS_STATE_length(&device->info) * 4,
+ 32);
+ if (device->cps_states.map == NULL)
+ goto fail_trivial_batch;
+
+ anv_genX(&device->info, init_cps_device_state)(device);
+ }
/* Allocate a null surface state at surface state offset 0. This makes
* NULL descriptor handling trivial because we can just memset structures
@@ -3257,6 +3452,8 @@ VkResult anv_CreateDevice(
anv_device_perf_init(device);
+ anv_device_utrace_init(device);
+
*pDevice = anv_device_to_handle(device);
return VK_SUCCESS;
@@ -3265,7 +3462,11 @@ VkResult anv_CreateDevice(
anv_pipeline_cache_finish(&device->default_pipeline_cache);
fail_trivial_batch_bo_and_scratch_pool:
anv_scratch_pool_finish(device, &device->scratch_pool);
+ fail_trivial_batch:
anv_device_release_bo(device, device->trivial_batch_bo);
+ fail_ray_query_bo:
+ if (device->ray_query_bo)
+ anv_device_release_bo(device, device->ray_query_bo);
fail_workaround_bo:
anv_device_release_bo(device, device->workaround_bo);
fail_surface_aux_map_pool:
@@ -3274,7 +3475,7 @@ VkResult anv_CreateDevice(
device->aux_map_ctx = NULL;
}
fail_binding_table_pool:
- if (physical_device->use_softpin)
+ if (!anv_use_relocations(physical_device))
anv_state_pool_finish(&device->binding_table_pool);
fail_surface_state_pool:
anv_state_pool_finish(&device->surface_state_pool);
@@ -3294,7 +3495,7 @@ VkResult anv_CreateDevice(
fail_mutex:
pthread_mutex_destroy(&device->mutex);
fail_vmas:
- if (physical_device->use_softpin) {
+ if (!anv_use_relocations(physical_device)) {
util_vma_heap_finish(&device->vma_hi);
util_vma_heap_finish(&device->vma_cva);
util_vma_heap_finish(&device->vma_lo);
@@ -3324,6 +3525,8 @@ void anv_DestroyDevice(
if (!device)
return;
+ anv_device_utrace_finish(device);
+
anv_device_finish_blorp(device);
anv_device_finish_rt_shaders(device);
@@ -3338,6 +3541,7 @@ void anv_DestroyDevice(
anv_state_reserved_pool_finish(&device->custom_border_colors);
anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
+ anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
#endif
for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
@@ -3347,6 +3551,13 @@ void anv_DestroyDevice(
anv_scratch_pool_finish(device, &device->scratch_pool);
+ if (device->vk.enabled_extensions.KHR_ray_query) {
+ for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_shadow_bos); i++) {
+ if (device->ray_query_shadow_bos[i] != NULL)
+ anv_device_release_bo(device, device->ray_query_shadow_bos[i]);
+ }
+ anv_device_release_bo(device, device->ray_query_bo);
+ }
anv_device_release_bo(device, device->workaround_bo);
anv_device_release_bo(device, device->trivial_batch_bo);
@@ -3355,7 +3566,7 @@ void anv_DestroyDevice(
device->aux_map_ctx = NULL;
}
- if (device->physical->use_softpin)
+ if (!anv_use_relocations(device->physical))
anv_state_pool_finish(&device->binding_table_pool);
anv_state_pool_finish(&device->surface_state_pool);
anv_state_pool_finish(&device->instruction_state_pool);
@@ -3366,7 +3577,7 @@ void anv_DestroyDevice(
anv_bo_cache_finish(&device->bo_cache);
- if (device->physical->use_softpin) {
+ if (!anv_use_relocations(device->physical)) {
util_vma_heap_finish(&device->vma_hi);
util_vma_heap_finish(&device->vma_cva);
util_vma_heap_finish(&device->vma_lo);
@@ -3403,126 +3614,29 @@ VkResult anv_EnumerateInstanceLayerProperties(
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
}
-void
-_anv_device_report_lost(struct anv_device *device)
-{
- assert(p_atomic_read(&device->_lost) > 0);
-
- device->lost_reported = true;
-
- for (uint32_t i = 0; i < device->queue_count; i++) {
- struct anv_queue *queue = &device->queues[i];
- if (queue->lost) {
- __vk_errorf(queue, VK_ERROR_DEVICE_LOST,
- queue->error_file, queue->error_line,
- "%s", queue->error_msg);
- }
- }
-}
-
-VkResult
-_anv_device_set_lost(struct anv_device *device,
- const char *file, int line,
- const char *msg, ...)
-{
- VkResult err;
- va_list ap;
-
- if (p_atomic_read(&device->_lost) > 0)
- return VK_ERROR_DEVICE_LOST;
-
- p_atomic_inc(&device->_lost);
- device->lost_reported = true;
-
- va_start(ap, msg);
- err = __vk_errorv(device, VK_ERROR_DEVICE_LOST, file, line, msg, ap);
- va_end(ap);
-
- if (env_var_as_boolean("ANV_ABORT_ON_DEVICE_LOSS", false))
- abort();
-
- return err;
-}
-
-VkResult
-_anv_queue_set_lost(struct anv_queue *queue,
- const char *file, int line,
- const char *msg, ...)
-{
- va_list ap;
-
- if (queue->lost)
- return VK_ERROR_DEVICE_LOST;
-
- queue->lost = true;
-
- queue->error_file = file;
- queue->error_line = line;
- va_start(ap, msg);
- vsnprintf(queue->error_msg, sizeof(queue->error_msg),
- msg, ap);
- va_end(ap);
-
- p_atomic_inc(&queue->device->_lost);
-
- if (env_var_as_boolean("ANV_ABORT_ON_DEVICE_LOSS", false))
- abort();
-
- return VK_ERROR_DEVICE_LOST;
-}
-
-VkResult
-anv_device_query_status(struct anv_device *device)
+static VkResult
+anv_device_check_status(struct vk_device *vk_device)
{
- /* This isn't likely as most of the callers of this function already check
- * for it. However, it doesn't hurt to check and it potentially lets us
- * avoid an ioctl.
- */
- if (anv_device_is_lost(device))
- return VK_ERROR_DEVICE_LOST;
+ struct anv_device *device = container_of(vk_device, struct anv_device, vk);
uint32_t active, pending;
int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
&active, &pending);
if (ret == -1) {
/* We don't know the real error. */
- return anv_device_set_lost(device, "get_reset_stats failed: %m");
+ return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
}
if (active) {
- return anv_device_set_lost(device, "GPU hung on one of our command buffers");
+ return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
} else if (pending) {
- return anv_device_set_lost(device, "GPU hung with commands in-flight");
+ return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
}
return VK_SUCCESS;
}
VkResult
-anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo)
-{
- /* Note: This only returns whether or not the BO is in use by an i915 GPU.
- * Other usages of the BO (such as on different hardware) will not be
- * flagged as "busy" by this ioctl. Use with care.
- */
- int ret = anv_gem_busy(device, bo->gem_handle);
- if (ret == 1) {
- return VK_NOT_READY;
- } else if (ret == -1) {
- /* We don't know the real error. */
- return anv_device_set_lost(device, "gem wait failed: %m");
- }
-
- /* Query for device status after the busy call. If the BO we're checking
- * got caught in a GPU hang we don't want to return VK_SUCCESS to the
- * client because it clearly doesn't have valid data. Yes, this most
- * likely means an ioctl, but we just did an ioctl to query the busy status
- * so it's no great loss.
- */
- return anv_device_query_status(device);
-}
-
-VkResult
anv_device_wait(struct anv_device *device, struct anv_bo *bo,
int64_t timeout)
{
@@ -3531,15 +3645,10 @@ anv_device_wait(struct anv_device *device, struct anv_bo *bo,
return VK_TIMEOUT;
} else if (ret == -1) {
/* We don't know the real error. */
- return anv_device_set_lost(device, "gem wait failed: %m");
+ return vk_device_set_lost(&device->vk, "gem wait failed: %m");
+ } else {
+ return VK_SUCCESS;
}
-
- /* Query for device status after the wait. If the BO we're waiting on got
- * caught in a GPU hang we don't want to return VK_SUCCESS to the client
- * because it clearly doesn't have valid data. Yes, this most likely means
- * an ioctl, but we just did an ioctl to wait so it's no great loss.
- */
- return anv_device_query_status(device);
}
uint64_t
@@ -3643,6 +3752,7 @@ VkResult anv_AllocateMemory(
mem->type = mem_type;
mem->map = NULL;
mem->map_size = 0;
+ mem->map_delta = 0;
mem->ahw = NULL;
mem->host_ptr = NULL;
@@ -3692,13 +3802,17 @@ VkResult anv_AllocateMemory(
}
default:
- anv_debug_ignored_stype(ext->sType);
+ if (ext->sType != VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA)
+ /* this isn't a real enum value,
+ * so use conditional to avoid compiler warn
+ */
+ anv_debug_ignored_stype(ext->sType);
break;
}
}
/* By default, we want all VkDeviceMemory objects to support CCS */
- if (device->physical->has_implicit_ccs)
+ if (device->physical->has_implicit_ccs && device->info.has_aux_map)
alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS;
if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR)
@@ -3709,13 +3823,6 @@ VkResult anv_AllocateMemory(
(host_ptr_info && host_ptr_info->handleType)) {
/* Anything imported or exported is EXTERNAL */
alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
-
- /* We can't have implicit CCS on external memory with an AUX-table.
- * Doing so would require us to sync the aux tables across processes
- * which is impractical.
- */
- if (device->info.has_aux_map)
- alloc_flags &= ~ANV_BO_ALLOC_IMPLICIT_CCS;
}
/* Check if we need to support Android HW buffer export. If so,
@@ -3829,15 +3936,12 @@ VkResult anv_AllocateMemory(
* the BO. In this case, we have a dedicated allocation.
*/
if (image->vk.wsi_legacy_scanout) {
- const uint32_t i915_tiling =
- isl_tiling_to_i915_tiling(image->planes[0].primary_surface.isl.tiling);
- int ret = anv_gem_set_tiling(device, mem->bo->gem_handle,
- image->planes[0].primary_surface.isl.row_pitch_B,
- i915_tiling);
- if (ret) {
+ const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
+ result = anv_device_set_bo_tiling(device, mem->bo,
+ surf->row_pitch_B,
+ surf->tiling);
+ if (result != VK_SUCCESS) {
anv_device_release_bo(device, mem->bo);
- result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
- "failed to set BO tiling: %m");
goto fail;
}
}
@@ -3999,11 +4103,20 @@ VkResult anv_MapMemory(
assert(size > 0);
assert(offset + size <= mem->bo->size);
- /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
- * takes a VkDeviceMemory pointer, it seems like only one map of the memory
- * at a time is valid. We could just mmap up front and return an offset
- * pointer here, but that may exhaust virtual memory on 32 bit
- * userspace. */
+ if (size != (size_t)size) {
+ return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
+ "requested size 0x%"PRIx64" does not fit in %u bits",
+ size, (unsigned)(sizeof(size_t) * 8));
+ }
+
+ /* From the Vulkan 1.2.194 spec:
+ *
+ * "memory must not be currently host mapped"
+ */
+ if (mem->map != NULL) {
+ return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
+ "Memory object already mapped.");
+ }
uint32_t gem_flags = 0;
@@ -4023,15 +4136,15 @@ VkResult anv_MapMemory(
/* Let's map whole pages */
map_size = align_u64(map_size, 4096);
- void *map = anv_gem_mmap(device, mem->bo->gem_handle,
- map_offset, map_size, gem_flags);
- if (map == MAP_FAILED)
- return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
+ void *map;
+ VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
+ map_size, gem_flags, &map);
+ if (result != VK_SUCCESS)
+ return result;
mem->map = map;
mem->map_size = map_size;
mem->map_delta = (offset - map_offset);
-
*ppData = mem->map + mem->map_delta;
return VK_SUCCESS;
@@ -4047,32 +4160,13 @@ void anv_UnmapMemory(
if (mem == NULL || mem->host_ptr)
return;
- anv_gem_munmap(device, mem->map, mem->map_size);
+ anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size);
mem->map = NULL;
mem->map_size = 0;
mem->map_delta = 0;
}
-static void
-clflush_mapped_ranges(struct anv_device *device,
- uint32_t count,
- const VkMappedMemoryRange *ranges)
-{
- for (uint32_t i = 0; i < count; i++) {
- ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory);
- uint64_t map_offset = ranges[i].offset + mem->map_delta;
- if (map_offset >= mem->map_size)
- continue;
-
- if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
- continue;
-
- intel_clflush_range(mem->map + map_offset,
- MIN2(ranges[i].size, mem->map_size - map_offset));
- }
-}
-
VkResult anv_FlushMappedMemoryRanges(
VkDevice _device,
uint32_t memoryRangeCount,
@@ -4086,7 +4180,19 @@ VkResult anv_FlushMappedMemoryRanges(
/* Make sure the writes we're flushing have landed. */
__builtin_ia32_mfence();
- clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
+ for (uint32_t i = 0; i < memoryRangeCount; i++) {
+ ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
+ uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
+ if (map_offset >= mem->map_size)
+ continue;
+
+ if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+
+ intel_clflush_range(mem->map + map_offset,
+ MIN2(pMemoryRanges[i].size,
+ mem->map_size - map_offset));
+ }
return VK_SUCCESS;
}
@@ -4101,7 +4207,19 @@ VkResult anv_InvalidateMappedMemoryRanges(
if (!device->physical->memory.need_clflush)
return VK_SUCCESS;
- clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
+ for (uint32_t i = 0; i < memoryRangeCount; i++) {
+ ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
+ uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
+ if (map_offset >= mem->map_size)
+ continue;
+
+ if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+
+ intel_invalidate_range(mem->map + map_offset,
+ MIN2(pMemoryRanges[i].size,
+ mem->map_size - map_offset));
+ }
/* Make sure no reads get moved up above the invalidate. */
__builtin_ia32_mfence();
@@ -4155,7 +4273,7 @@ VkResult anv_QueueBindSparse(
VkFence fence)
{
ANV_FROM_HANDLE(anv_queue, queue, _queue);
- if (anv_device_is_lost(queue->device))
+ if (vk_device_is_lost(&queue->device->vk))
return VK_ERROR_DEVICE_LOST;
return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
@@ -4211,7 +4329,7 @@ VkResult anv_GetEventStatus(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_event, event, _event);
- if (anv_device_is_lost(device))
+ if (vk_device_is_lost(&device->vk))
return VK_ERROR_DEVICE_LOST;
return *(uint64_t *)event->state.map;
@@ -4375,7 +4493,7 @@ VkDeviceAddress anv_GetBufferDeviceAddress(
ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
assert(!anv_address_is_null(buffer->address));
- assert(buffer->address.bo->flags & EXEC_OBJECT_PINNED);
+ assert(anv_bo_is_pinned(buffer->address.bo));
return anv_address_physical(buffer->address);
}
@@ -4393,7 +4511,7 @@ uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
{
ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
- assert(memory->bo->flags & EXEC_OBJECT_PINNED);
+ assert(anv_bo_is_pinned(memory->bo));
assert(memory->bo->has_client_visible_address);
return intel_48b_address(memory->bo->offset);
@@ -4402,6 +4520,7 @@ uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
void
anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
enum isl_format format,
+ struct isl_swizzle swizzle,
isl_surf_usage_flags_t usage,
struct anv_address address,
uint32_t range, uint32_t stride)
@@ -4412,7 +4531,7 @@ anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
address.bo && address.bo->is_external),
.size_B = range,
.format = format,
- .swizzle = ISL_SWIZZLE_IDENTITY,
+ .swizzle = swizzle,
.stride_B = stride);
}
@@ -4440,63 +4559,6 @@ void anv_DestroySampler(
vk_object_free(&device->vk, pAllocator, sampler);
}
-VkResult anv_CreateFramebuffer(
- VkDevice _device,
- const VkFramebufferCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkFramebuffer* pFramebuffer)
-{
- ANV_FROM_HANDLE(anv_device, device, _device);
- struct anv_framebuffer *framebuffer;
-
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
-
- size_t size = sizeof(*framebuffer);
-
- /* VK_KHR_imageless_framebuffer extension says:
- *
- * If flags includes VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT_KHR,
- * parameter pAttachments is ignored.
- */
- if (!(pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT_KHR))
- size += sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
-
- framebuffer = vk_object_alloc(&device->vk, pAllocator, size,
- VK_OBJECT_TYPE_FRAMEBUFFER);
- if (framebuffer == NULL)
- return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- framebuffer->width = pCreateInfo->width;
- framebuffer->height = pCreateInfo->height;
- framebuffer->layers = pCreateInfo->layers;
-
- if (!(pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT_KHR)) {
- for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
- ANV_FROM_HANDLE(anv_image_view, iview, pCreateInfo->pAttachments[i]);
- framebuffer->attachments[i] = iview;
- }
- framebuffer->attachment_count = pCreateInfo->attachmentCount;
- }
-
- *pFramebuffer = anv_framebuffer_to_handle(framebuffer);
-
- return VK_SUCCESS;
-}
-
-void anv_DestroyFramebuffer(
- VkDevice _device,
- VkFramebuffer _fb,
- const VkAllocationCallbacks* pAllocator)
-{
- ANV_FROM_HANDLE(anv_device, device, _device);
- ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
-
- if (!fb)
- return;
-
- vk_object_free(&device->vk, pAllocator, fb);
-}
-
static const VkTimeDomainEXT anv_time_domains[] = {
VK_TIME_DOMAIN_DEVICE_EXT,
VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
@@ -4511,10 +4573,10 @@ VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
VkTimeDomainEXT *pTimeDomains)
{
int d;
- VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
+ VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
- vk_outarray_append(&out, i) {
+ vk_outarray_append_typed(VkTimeDomainEXT, &out, i) {
*i = anv_time_domains[d];
}
}
@@ -4566,8 +4628,8 @@ VkResult anv_GetCalibratedTimestampsEXT(
&pTimestamps[d]);
if (ret != 0) {
- return anv_device_set_lost(device, "Failed to read the TIMESTAMP "
- "register: %m");
+ return vk_device_set_lost(&device->vk, "Failed to read the "
+ "TIMESTAMP register: %m");
}
uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
max_clock_period = MAX2(max_clock_period, device_period);
@@ -4702,8 +4764,14 @@ vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
*
* - Loader interface v4 differs from v3 in:
* - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
+ *
+ * - Loader interface v5 differs from v4 in:
+ * - The ICD must support Vulkan API version 1.1 and must not return
+ * VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a
+ * Vulkan Loader with interface v4 or smaller is being used and the
+ * application provides an API version that is greater than 1.0.
*/
- *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
+ *pSupportedVersion = MIN2(*pSupportedVersion, 5u);
return VK_SUCCESS;
}
@@ -4713,30 +4781,62 @@ VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
VkPhysicalDeviceFragmentShadingRateKHR* pFragmentShadingRates)
{
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
- VK_OUTARRAY_MAKE(out, pFragmentShadingRates, pFragmentShadingRateCount);
-
-#define append_rate(_samples, _width, _height) \
- do { \
- vk_outarray_append(&out, __r) { \
- __r->sampleCounts = _samples; \
- __r->fragmentSize = (VkExtent2D) { \
- .width = _width, \
- .height = _height, \
- }; \
- } \
+ VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
+ pFragmentShadingRates, pFragmentShadingRateCount);
+
+#define append_rate(_samples, _width, _height) \
+ do { \
+ vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, __r) { \
+ __r->sampleCounts = _samples; \
+ __r->fragmentSize = (VkExtent2D) { \
+ .width = _width, \
+ .height = _height, \
+ }; \
+ } \
} while (0)
VkSampleCountFlags sample_counts =
isl_device_get_sample_counts(&physical_device->isl_dev);
+ /* BSpec 47003: There are a number of restrictions on the sample count
+ * based off the coarse pixel size.
+ */
+ static const VkSampleCountFlags cp_size_sample_limits[] = {
+ [1] = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT |
+ ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
+ [2] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
+ [4] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
+ [8] = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
+ [16] = ISL_SAMPLE_COUNT_1_BIT,
+ };
+
for (uint32_t x = 4; x >= 1; x /= 2) {
for (uint32_t y = 4; y >= 1; y /= 2) {
- /* For size {1, 1}, the sample count must be ~0 */
- if (x == 1 && y == 1)
- append_rate(~0, x, y);
- else
- append_rate(sample_counts, x, y);
- }
+ if (physical_device->info.has_coarse_pixel_primitive_and_cb) {
+ /* BSpec 47003:
+ * "CPsize 1x4 and 4x1 are not supported"
+ */
+ if ((x == 1 && y == 4) || (x == 4 && y == 1))
+ continue;
+
+ /* For size {1, 1}, the sample count must be ~0
+ *
+ * 4x2 is also a specially case.
+ */
+ if (x == 1 && y == 1)
+ append_rate(~0, x, y);
+ else if (x == 4 && y == 2)
+ append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y);
+ else
+ append_rate(cp_size_sample_limits[x * y], x, y);
+ } else {
+ /* For size {1, 1}, the sample count must be ~0 */
+ if (x == 1 && y == 1)
+ append_rate(~0, x, y);
+ else
+ append_rate(sample_counts, x, y);
+ }
+ }
}
#undef append_rate