summaryrefslogtreecommitdiff
path: root/lib/mesa/src/broadcom/vulkan
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2024-04-02 09:30:07 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2024-04-02 09:30:07 +0000
commitf54e142455cb3c9d1662dae7e096a32a47e5409b (patch)
tree440ecd46269f0eac25e349e1ed58f246490c5e26 /lib/mesa/src/broadcom/vulkan
parent36d8503c27530f68d655d3ef77a6eaa4dfd8ad65 (diff)
Import Mesa 23.3.6
Diffstat (limited to 'lib/mesa/src/broadcom/vulkan')
-rw-r--r--lib/mesa/src/broadcom/vulkan/meson.build15
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_android.c334
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_bo.c4
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_cl.h9
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c455
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_device.c989
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_event.c22
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_formats.c31
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_image.c349
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_limits.h4
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c23
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c880
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_pass.c21
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c315
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c2
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_private.h183
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_query.c162
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_queue.c66
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c22
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_wsi.c79
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c503
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_device.c53
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_formats.c1
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_image.c66
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_meta_common.c108
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_pipeline.c143
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_private.h53
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_query.c67
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_queue.c18
29 files changed, 3111 insertions, 1866 deletions
diff --git a/lib/mesa/src/broadcom/vulkan/meson.build b/lib/mesa/src/broadcom/vulkan/meson.build
index c3595cf73..3605e0965 100644
--- a/lib/mesa/src/broadcom/vulkan/meson.build
+++ b/lib/mesa/src/broadcom/vulkan/meson.build
@@ -25,7 +25,9 @@ v3dv_entrypoints = custom_target(
command : [
prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
'--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'v3dv',
+ '--beta', with_vulkan_beta.to_string(),
'--device-prefix', 'ver42',
+ '--device-prefix', 'ver71',
],
depend_files : vk_entrypoints_gen_depend_files,
)
@@ -63,13 +65,11 @@ files_per_version = files(
'v3dvx_pipeline.c',
'v3dvx_meta_common.c',
'v3dvx_pipeline.c',
+ 'v3dvx_query.c',
'v3dvx_queue.c',
)
-# The vulkan driver only supports version >= 42, which is the version present in
-# Rpi4. We need to explicitly set it as we are reusing pieces from the GL v3d
-# driver.
-v3d_versions = ['42']
+v3d_versions = ['42', '71']
v3dv_flags = []
@@ -100,7 +100,7 @@ if with_platform_wayland
endif
if with_platform_android
- v3dv_deps += dep_android
+ v3dv_deps += [dep_android, idep_u_gralloc]
v3dv_flags += '-DVK_USE_PLATFORM_ANDROID_KHR'
libv3dv_files += files('v3dv_android.c')
endif
@@ -112,7 +112,7 @@ foreach ver : v3d_versions
[files_per_version, v3d_xml_pack, v3dv_entrypoints[0]],
include_directories : [
inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom,
- inc_compiler, inc_util,
+ inc_util,
],
c_args : [v3dv_flags, '-DV3D_VERSION=' + ver],
gnu_symbol_visibility : 'hidden',
@@ -124,7 +124,7 @@ libvulkan_broadcom = shared_library(
'vulkan_broadcom',
[libv3dv_files, v3dv_entrypoints, sha1_h],
include_directories : [
- inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_broadcom, inc_compiler, inc_util,
+ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_broadcom, inc_util,
],
link_with : [
libbroadcom_cle,
@@ -165,6 +165,7 @@ broadcom_icd = custom_target(
],
build_by_default : true,
install_dir : with_vulkan_icd_dir,
+ install_tag : 'runtime',
install : true,
)
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_android.c b/lib/mesa/src/broadcom/vulkan/v3dv_android.c
index d217aaf11..6c49e5d71 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_android.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_android.c
@@ -35,6 +35,9 @@
#include <vulkan/vk_android_native_buffer.h>
#include <vulkan/vk_icd.h>
+#include "vk_android.h"
+#include "vulkan/util/vk_enum_defines.h"
+
#include "util/libsync.h"
#include "util/log.h"
#include "util/os_file.h"
@@ -112,117 +115,55 @@ v3dv_hal_close(struct hw_device_t *dev)
return -1;
}
-static int
-get_format_bpp(int native)
-{
- int bpp;
-
- switch (native) {
- case HAL_PIXEL_FORMAT_RGBA_FP16:
- bpp = 8;
- break;
- case HAL_PIXEL_FORMAT_RGBA_8888:
- case HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED:
- case HAL_PIXEL_FORMAT_RGBX_8888:
- case HAL_PIXEL_FORMAT_BGRA_8888:
- case HAL_PIXEL_FORMAT_RGBA_1010102:
- bpp = 4;
- break;
- case HAL_PIXEL_FORMAT_RGB_565:
- bpp = 2;
- break;
- default:
- bpp = 0;
- break;
- }
-
- return bpp;
-}
-
-/* get buffer info from VkNativeBufferANDROID */
-static VkResult
-v3dv_gralloc_info_other(struct v3dv_device *device,
- const VkNativeBufferANDROID *native_buffer,
- int *out_stride,
- uint64_t *out_modifier)
-{
- *out_stride = native_buffer->stride /*in pixels*/ *
- get_format_bpp(native_buffer->format);
- *out_modifier = DRM_FORMAT_MOD_LINEAR;
- return VK_SUCCESS;
-}
-
-static const char cros_gralloc_module_name[] = "CrOS Gralloc";
-
-#define CROS_GRALLOC_DRM_GET_BUFFER_INFO 4
-
-struct cros_gralloc0_buffer_info
-{
- uint32_t drm_fourcc;
- int num_fds;
- int fds[4];
- uint64_t modifier;
- int offset[4];
- int stride[4];
-};
-
-static VkResult
-v3dv_gralloc_info_cros(struct v3dv_device *device,
- const VkNativeBufferANDROID *native_buffer,
- int *out_stride,
- uint64_t *out_modifier)
+VkResult
+v3dv_gralloc_to_drm_explicit_layout(struct u_gralloc *gralloc,
+ struct u_gralloc_buffer_handle *in_hnd,
+ VkImageDrmFormatModifierExplicitCreateInfoEXT *out,
+ VkSubresourceLayout *out_layouts,
+ int max_planes)
{
- const gralloc_module_t *gralloc = device->gralloc;
- struct cros_gralloc0_buffer_info info;
- int ret;
+ struct u_gralloc_buffer_basic_info info;
- ret = gralloc->perform(gralloc, CROS_GRALLOC_DRM_GET_BUFFER_INFO,
- native_buffer->handle, &info);
- if (ret)
+ if (u_gralloc_get_buffer_basic_info(gralloc, in_hnd, &info) != 0)
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
- *out_stride = info.stride[0];
- *out_modifier = info.modifier;
+ if (info.num_planes > max_planes)
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
- return VK_SUCCESS;
-}
+ bool is_disjoint = false;
+ for (int i = 1; i < info.num_planes; i++) {
+ if (info.offsets[i] == 0) {
+ is_disjoint = true;
+ break;
+ }
+ }
-VkResult
-v3dv_gralloc_info(struct v3dv_device *device,
- const VkNativeBufferANDROID *native_buffer,
- int *out_dmabuf,
- int *out_stride,
- int *out_size,
- uint64_t *out_modifier)
-{
- if (device->gralloc_type == V3DV_GRALLOC_UNKNOWN) {
- /* get gralloc module for gralloc buffer info query */
- int err = hw_get_module(GRALLOC_HARDWARE_MODULE_ID,
- (const hw_module_t **) &device->gralloc);
+ if (is_disjoint) {
+ /* We don't support disjoint planes yet */
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ }
- device->gralloc_type = V3DV_GRALLOC_OTHER;
+ memset(out_layouts, 0, sizeof(*out_layouts) * info.num_planes);
+ memset(out, 0, sizeof(*out));
- if (err == 0) {
- const gralloc_module_t *gralloc = device->gralloc;
- mesa_logi("opened gralloc module name: %s", gralloc->common.name);
+ out->sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT;
+ out->pPlaneLayouts = out_layouts;
- if (strcmp(gralloc->common.name, cros_gralloc_module_name) == 0 &&
- gralloc->perform) {
- device->gralloc_type = V3DV_GRALLOC_CROS;
- }
- }
+ out->drmFormatModifier = info.modifier;
+ out->drmFormatModifierPlaneCount = info.num_planes;
+ for (int i = 0; i < info.num_planes; i++) {
+ out_layouts[i].offset = info.offsets[i];
+ out_layouts[i].rowPitch = info.strides[i];
}
- *out_dmabuf = native_buffer->handle->data[0];
- *out_size = lseek(*out_dmabuf, 0, SEEK_END);
-
- if (device->gralloc_type == V3DV_GRALLOC_CROS) {
- return v3dv_gralloc_info_cros(device, native_buffer, out_stride,
- out_modifier);
- } else {
- return v3dv_gralloc_info_other(device, native_buffer, out_stride,
- out_modifier);
+ if (info.drm_fourcc == DRM_FORMAT_YVU420) {
+ /* Swap the U and V planes to match the VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM */
+ VkSubresourceLayout tmp = out_layouts[1];
+ out_layouts[1] = out_layouts[2];
+ out_layouts[2] = tmp;
}
+
+ return VK_SUCCESS;
}
VkResult
@@ -231,11 +172,8 @@ v3dv_import_native_buffer_fd(VkDevice device_h,
const VkAllocationCallbacks *alloc,
VkImage image_h)
{
- struct v3dv_image *image = NULL;
VkResult result;
- image = v3dv_image_from_handle(image_h);
-
VkDeviceMemory memory_h;
const VkMemoryDedicatedAllocateInfo ded_alloc = {
@@ -252,13 +190,12 @@ v3dv_import_native_buffer_fd(VkDevice device_h,
.fd = os_dupfd_cloexec(native_buffer_fd),
};
- assert(image->plane_count == 1);
result =
v3dv_AllocateMemory(device_h,
&(VkMemoryAllocateInfo) {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = &import_info,
- .allocationSize = image->planes[0].size,
+ .allocationSize = lseek(native_buffer_fd, 0, SEEK_END),
.memoryTypeIndex = 0,
},
alloc, &memory_h);
@@ -274,8 +211,6 @@ v3dv_import_native_buffer_fd(VkDevice device_h,
};
v3dv_BindImageMemory2(device_h, 1, &bind_info);
- image->is_native_buffer_memory = true;
-
return VK_SUCCESS;
fail_create_image:
@@ -417,6 +352,193 @@ v3dv_GetSwapchainGrallocUsage2ANDROID(
*grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_HWCOMPOSER;
}
+ if (swapchainImageUsage & VK_SWAPCHAIN_IMAGE_USAGE_SHARED_BIT_ANDROID) {
+ uint64_t front_rendering_usage = 0;
+ u_gralloc_get_front_rendering_usage(device->gralloc, &front_rendering_usage);
+ *grallocProducerUsage |= front_rendering_usage;
+ }
+
return VK_SUCCESS;
}
#endif
+
+/* ----------------------------- AHardwareBuffer --------------------------- */
+
+static VkResult
+get_ahb_buffer_format_properties2(VkDevice device_h, const struct AHardwareBuffer *buffer,
+ VkAndroidHardwareBufferFormatProperties2ANDROID *pProperties)
+{
+ V3DV_FROM_HANDLE(v3dv_device, device, device_h);
+
+ /* Get a description of buffer contents . */
+ AHardwareBuffer_Desc desc;
+ AHardwareBuffer_describe(buffer, &desc);
+
+ /* Verify description. */
+ const uint64_t gpu_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE |
+ AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT |
+ AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
+
+ /* "Buffer must be a valid Android hardware buffer object with at least
+ * one of the AHARDWAREBUFFER_USAGE_GPU_* usage flags."
+ */
+ if (!(desc.usage & (gpu_usage)))
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+ /* Fill properties fields based on description. */
+ VkAndroidHardwareBufferFormatProperties2ANDROID *p = pProperties;
+
+ p->samplerYcbcrConversionComponents.r = VK_COMPONENT_SWIZZLE_IDENTITY;
+ p->samplerYcbcrConversionComponents.g = VK_COMPONENT_SWIZZLE_IDENTITY;
+ p->samplerYcbcrConversionComponents.b = VK_COMPONENT_SWIZZLE_IDENTITY;
+ p->samplerYcbcrConversionComponents.a = VK_COMPONENT_SWIZZLE_IDENTITY;
+
+ p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601;
+ p->suggestedYcbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL;
+
+ p->suggestedXChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
+ p->suggestedYChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
+
+ VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2};
+
+ p->format = vk_ahb_format_to_image_format(desc.format);
+
+ VkFormat external_format = p->format;
+
+ if (p->format != VK_FORMAT_UNDEFINED)
+ goto finish;
+
+ /* External format only case
+ *
+ * From vkGetAndroidHardwareBufferPropertiesANDROID spec:
+ * "If the Android hardware buffer has one of the formats listed in the Format
+ * Equivalence table (see spec.), then format must have the equivalent Vulkan
+ * format listed in the table. Otherwise, format may be VK_FORMAT_UNDEFINED,
+ * indicating the Android hardware buffer can only be used with an external format."
+ *
+ * From SKIA source code analysis: p->format MUST be VK_FORMAT_UNDEFINED, if the
+ * format is not in the Equivalence table.
+ */
+
+ struct u_gralloc_buffer_handle gr_handle = {
+ .handle = AHardwareBuffer_getNativeHandle(buffer),
+ .pixel_stride = desc.stride,
+ .hal_format = desc.format,
+ };
+
+ struct u_gralloc_buffer_basic_info info;
+
+ if (u_gralloc_get_buffer_basic_info(device->gralloc, &gr_handle, &info) != 0)
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+ switch (info.drm_fourcc) {
+ case DRM_FORMAT_YVU420:
+ /* Assuming that U and V planes are swapped earlier */
+ external_format = VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM;
+ break;
+ case DRM_FORMAT_NV12:
+ external_format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
+ break;
+ default:;
+ mesa_loge("Unsupported external DRM format: %d", info.drm_fourcc);
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ }
+
+ struct u_gralloc_buffer_color_info color_info;
+ if (u_gralloc_get_buffer_color_info(device->gralloc, &gr_handle, &color_info) == 0) {
+ switch (color_info.yuv_color_space) {
+ case __DRI_YUV_COLOR_SPACE_ITU_REC601:
+ p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601;
+ break;
+ case __DRI_YUV_COLOR_SPACE_ITU_REC709:
+ p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709;
+ break;
+ case __DRI_YUV_COLOR_SPACE_ITU_REC2020:
+ p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020;
+ break;
+ default:
+ break;
+ }
+
+ p->suggestedYcbcrRange = (color_info.sample_range == __DRI_YUV_NARROW_RANGE) ?
+ VK_SAMPLER_YCBCR_RANGE_ITU_NARROW : VK_SAMPLER_YCBCR_RANGE_ITU_FULL;
+ p->suggestedXChromaOffset = (color_info.horizontal_siting == __DRI_YUV_CHROMA_SITING_0_5) ?
+ VK_CHROMA_LOCATION_MIDPOINT : VK_CHROMA_LOCATION_COSITED_EVEN;
+ p->suggestedYChromaOffset = (color_info.vertical_siting == __DRI_YUV_CHROMA_SITING_0_5) ?
+ VK_CHROMA_LOCATION_MIDPOINT : VK_CHROMA_LOCATION_COSITED_EVEN;
+ }
+
+finish:
+
+ v3dv_GetPhysicalDeviceFormatProperties2(v3dv_physical_device_to_handle(device->pdevice),
+ external_format, &format_properties);
+
+ /* v3dv doesn't support direct sampling from linear images but has a logic to copy
+ * from linear to tiled images implicitly before sampling. Therefore expose optimal
+ * features for both linear and optimal tiling.
+ */
+ p->formatFeatures = format_properties.formatProperties.optimalTilingFeatures;
+ p->externalFormat = external_format;
+
+ /* From vkGetAndroidHardwareBufferPropertiesANDROID spec:
+ * "The formatFeatures member *must* include
+ * VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT and at least one of
+ * VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT or
+ * VK_FORMAT_FEATURE_2_COSITED_CHROMA_SAMPLES_BIT"
+ */
+ p->formatFeatures |= VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT_KHR;
+
+ return VK_SUCCESS;
+}
+
+VkResult
+v3dv_GetAndroidHardwareBufferPropertiesANDROID(VkDevice device_h,
+ const struct AHardwareBuffer *buffer,
+ VkAndroidHardwareBufferPropertiesANDROID *pProperties)
+{
+ V3DV_FROM_HANDLE(v3dv_device, dev, device_h);
+ struct v3dv_physical_device *pdevice = dev->pdevice;
+
+ VkResult result;
+
+ VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop =
+ vk_find_struct(pProperties->pNext, ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID);
+
+ /* Fill format properties of an Android hardware buffer. */
+ if (format_prop) {
+ VkAndroidHardwareBufferFormatProperties2ANDROID format_prop2 = {
+ .sType = VK_STRUCTURE_TYPE_ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_2_ANDROID,
+ };
+ result = get_ahb_buffer_format_properties2(device_h, buffer, &format_prop2);
+ if (result != VK_SUCCESS)
+ return result;
+
+ format_prop->format = format_prop2.format;
+ format_prop->externalFormat = format_prop2.externalFormat;
+ format_prop->formatFeatures =
+ vk_format_features2_to_features(format_prop2.formatFeatures);
+ format_prop->samplerYcbcrConversionComponents =
+ format_prop2.samplerYcbcrConversionComponents;
+ format_prop->suggestedYcbcrModel = format_prop2.suggestedYcbcrModel;
+ format_prop->suggestedYcbcrRange = format_prop2.suggestedYcbcrRange;
+ format_prop->suggestedXChromaOffset = format_prop2.suggestedXChromaOffset;
+ format_prop->suggestedYChromaOffset = format_prop2.suggestedYChromaOffset;
+ }
+
+ VkAndroidHardwareBufferFormatProperties2ANDROID *format_prop2 =
+ vk_find_struct(pProperties->pNext, ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_2_ANDROID);
+ if (format_prop2) {
+ result = get_ahb_buffer_format_properties2(device_h, buffer, format_prop2);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ const native_handle_t *handle = AHardwareBuffer_getNativeHandle(buffer);
+ assert(handle && handle->numFds > 0);
+ pProperties->allocationSize = lseek(handle->data[0], 0, SEEK_END);
+
+ /* All memory types. */
+ pProperties->memoryTypeBits = (1u << pdevice->memory.memoryTypeCount) - 1;
+
+ return VK_SUCCESS;
+}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_bo.c b/lib/mesa/src/broadcom/vulkan/v3dv_bo.c
index 9f1bf423a..2728a9233 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_bo.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_bo.c
@@ -339,7 +339,7 @@ v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size)
if (!ok)
return false;
- ok = v3dv_bo_wait(device, bo, PIPE_TIMEOUT_INFINITE);
+ ok = v3dv_bo_wait(device, bo, OS_TIMEOUT_INFINITE);
if (!ok) {
fprintf(stderr, "memory wait for map failed\n");
return false;
@@ -359,7 +359,7 @@ v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo)
bo->map_size = 0;
}
-static boolean
+static bool
reallocate_size_list(struct v3dv_bo_cache *cache,
struct v3dv_device *device,
uint32_t size)
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_cl.h b/lib/mesa/src/broadcom/vulkan/v3dv_cl.h
index db1dfb08c..7e17ac395 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_cl.h
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_cl.h
@@ -27,6 +27,7 @@
#include "broadcom/cle/v3d_packet_helpers.h"
#include "util/list.h"
+#include "util/macros.h"
struct v3dv_bo;
struct v3dv_job;
@@ -150,15 +151,9 @@ cl_aligned_reloc(struct v3dv_cl *cl,
uint32_t v3dv_cl_ensure_space(struct v3dv_cl *cl, uint32_t space, uint32_t alignment);
void v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space);
-/* We redefine ALIGN as a macro as we want to use cl_aligned_packet_length for
- * struct fields
- */
-#define ALIGN(value, alignment) \
- (((value) + (alignment) - 1) & ~((alignment) - 1))
-
#define cl_packet_header(packet) V3DX(packet ## _header)
#define cl_packet_length(packet) V3DX(packet ## _length)
-#define cl_aligned_packet_length(packet, alignment) ALIGN(cl_packet_length(packet), alignment)
+#define cl_aligned_packet_length(packet, alignment) ALIGN_POT(cl_packet_length(packet), alignment)
#define cl_packet_pack(packet) V3DX(packet ## _pack)
#define cl_packet_struct(packet) V3DX(packet)
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c b/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c
index 449e532c6..dc01a0fa0 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -24,6 +24,7 @@
#include "v3dv_private.h"
#include "util/u_pack_color.h"
#include "vk_util.h"
+#include "vulkan/runtime/vk_common_entrypoints.h"
void
v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo)
@@ -348,6 +349,7 @@ job_compute_frame_tiling(struct v3dv_job *job,
uint32_t layers,
uint32_t render_target_count,
uint8_t max_internal_bpp,
+ uint8_t total_color_bpp,
bool msaa,
bool double_buffer)
{
@@ -360,13 +362,16 @@ job_compute_frame_tiling(struct v3dv_job *job,
tiling->render_target_count = render_target_count;
tiling->msaa = msaa;
tiling->internal_bpp = max_internal_bpp;
+ tiling->total_color_bpp = total_color_bpp;
tiling->double_buffer = double_buffer;
/* Double-buffer is incompatible with MSAA */
assert(!tiling->msaa || !tiling->double_buffer);
- v3d_choose_tile_size(render_target_count, max_internal_bpp,
- tiling->msaa, tiling->double_buffer,
+ v3d_choose_tile_size(&job->device->devinfo,
+ render_target_count,
+ max_internal_bpp, total_color_bpp, msaa,
+ tiling->double_buffer,
&tiling->tile_width, &tiling->tile_height);
tiling->draw_tiles_x = DIV_ROUND_UP(width, tiling->tile_width);
@@ -457,6 +462,7 @@ v3dv_job_start_frame(struct v3dv_job *job,
bool allocate_tile_state_now,
uint32_t render_target_count,
uint8_t max_internal_bpp,
+ uint8_t total_color_bpp,
bool msaa)
{
assert(job);
@@ -467,7 +473,7 @@ v3dv_job_start_frame(struct v3dv_job *job,
const struct v3dv_frame_tiling *tiling =
job_compute_frame_tiling(job, width, height, layers,
render_target_count, max_internal_bpp,
- msaa, false);
+ total_color_bpp, msaa, false);
v3dv_cl_ensure_space_with_branch(&job->bcl, 256);
v3dv_return_if_oom(NULL, job);
@@ -528,6 +534,7 @@ cmd_buffer_end_render_pass_frame(struct v3dv_cmd_buffer *cmd_buffer)
job->frame_tiling.layers,
job->frame_tiling.render_target_count,
job->frame_tiling.internal_bpp,
+ job->frame_tiling.total_color_bpp,
job->frame_tiling.msaa,
true);
@@ -1374,7 +1381,7 @@ cmd_buffer_emit_subpass_clears(struct v3dv_cmd_buffer *cmd_buffer)
}
uint32_t att_count = 0;
- VkClearAttachment atts[V3D_MAX_DRAW_BUFFERS + 1]; /* 4 color + D/S */
+ VkClearAttachment atts[V3D_MAX_DRAW_BUFFERS + 1]; /* +1 for D/S */
/* We only need to emit subpass clears as draw calls for color attachments
* if the render area is not aligned to tile boundaries.
@@ -1434,7 +1441,7 @@ cmd_buffer_emit_subpass_clears(struct v3dv_cmd_buffer *cmd_buffer)
"VK_ATTACHMENT_LOAD_OP_CLEAR.\n");
} else if (subpass->do_depth_clear_with_draw ||
subpass->do_stencil_clear_with_draw) {
- perf_debug("Subpass clears DEPTH but loads STENCIL (or viceversa), "
+ perf_debug("Subpass clears DEPTH but loads STENCIL (or vice versa), "
"falling back to vkCmdClearAttachments for "
"VK_ATTACHMENT_LOAD_OP_CLEAR.\n");
}
@@ -1672,10 +1679,11 @@ cmd_buffer_subpass_create_job(struct v3dv_cmd_buffer *cmd_buffer,
const struct v3dv_framebuffer *framebuffer = state->framebuffer;
- uint8_t internal_bpp;
+ uint8_t max_internal_bpp, total_color_bpp;
bool msaa;
v3dv_X(job->device, framebuffer_compute_internal_bpp_msaa)
- (framebuffer, state->attachments, subpass, &internal_bpp, &msaa);
+ (framebuffer, state->attachments, subpass,
+ &max_internal_bpp, &total_color_bpp, &msaa);
/* From the Vulkan spec:
*
@@ -1699,7 +1707,8 @@ cmd_buffer_subpass_create_job(struct v3dv_cmd_buffer *cmd_buffer,
layers,
true, false,
subpass->color_count,
- internal_bpp,
+ max_internal_bpp,
+ total_color_bpp,
msaa);
}
@@ -2062,6 +2071,14 @@ cmd_buffer_bind_pipeline_static_state(struct v3dv_cmd_buffer *cmd_buffer,
}
}
+ if (!(dynamic_mask & V3DV_DYNAMIC_DEPTH_BOUNDS)) {
+ if (memcmp(&dest->depth_bounds, &src->depth_bounds,
+ sizeof(src->depth_bounds))) {
+ memcpy(&dest->depth_bounds, &src->depth_bounds, sizeof(src->depth_bounds));
+ dirty |= V3DV_CMD_DIRTY_DEPTH_BOUNDS;
+ }
+ }
+
if (!(dynamic_mask & V3DV_DYNAMIC_LINE_WIDTH)) {
if (dest->line_width != src->line_width) {
dest->line_width = src->line_width;
@@ -2131,39 +2148,6 @@ v3dv_CmdBindPipeline(VkCommandBuffer commandBuffer,
}
}
-/* FIXME: C&P from radv. tu has similar code. Perhaps common place? */
-void
-v3dv_viewport_compute_xform(const VkViewport *viewport,
- float scale[3],
- float translate[3])
-{
- float x = viewport->x;
- float y = viewport->y;
- float half_width = 0.5f * viewport->width;
- float half_height = 0.5f * viewport->height;
- double n = viewport->minDepth;
- double f = viewport->maxDepth;
-
- scale[0] = half_width;
- translate[0] = half_width + x;
- scale[1] = half_height;
- translate[1] = half_height + y;
-
- scale[2] = (f - n);
- translate[2] = n;
-
- /* It seems that if the scale is small enough the hardware won't clip
- * correctly so we work around this my choosing the smallest scale that
- * seems to work.
- *
- * This case is exercised by CTS:
- * dEQP-VK.draw.inverted_depth_ranges.nodepthclamp_deltazero
- */
- const float min_abs_scale = 0.000009f;
- if (fabs(scale[2]) < min_abs_scale)
- scale[2] = min_abs_scale * (scale[2] < 0 ? -1.0f : 1.0f);
-}
-
/* Considers the pipeline's negative_one_to_one state and applies it to the
* current viewport transform if needed to produce the resulting Z translate
* and scale parameters.
@@ -2216,9 +2200,10 @@ v3dv_CmdSetViewport(VkCommandBuffer commandBuffer,
viewportCount * sizeof(*pViewports));
for (uint32_t i = firstViewport; i < total_count; i++) {
- v3dv_viewport_compute_xform(&state->dynamic.viewport.viewports[i],
- state->dynamic.viewport.scale[i],
- state->dynamic.viewport.translate[i]);
+ v3dv_X(cmd_buffer->device, viewport_compute_xform)
+ (&state->dynamic.viewport.viewports[i],
+ state->dynamic.viewport.scale[i],
+ state->dynamic.viewport.translate[i]);
}
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEWPORT;
@@ -2264,11 +2249,14 @@ emit_scissor(struct v3dv_cmd_buffer *cmd_buffer)
*/
float *vptranslate = dynamic->viewport.translate[0];
float *vpscale = dynamic->viewport.scale[0];
+ assert(vpscale[0] >= 0);
- float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
- float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
- float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
- float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
+ float vp_minx = vptranslate[0] - vpscale[0];
+ float vp_maxx = vptranslate[0] + vpscale[0];
+
+ /* With KHR_maintenance1 viewport may have negative Y */
+ float vp_miny = vptranslate[1] - fabsf(vpscale[1]);
+ float vp_maxy = vptranslate[1] + fabsf(vpscale[1]);
/* Quoting from v3dx_emit:
* "Clip to the scissor if it's enabled, but still clip to the
@@ -2297,11 +2285,6 @@ emit_scissor(struct v3dv_cmd_buffer *cmd_buffer)
maxy = MIN2(vp_maxy, cmd_buffer->state.render_area.offset.y +
cmd_buffer->state.render_area.extent.height);
- minx = vp_minx;
- miny = vp_miny;
- maxx = vp_maxx;
- maxy = vp_maxy;
-
/* Clip against user provided scissor if needed.
*
* FIXME: right now we only allow one scissor. Below would need to be
@@ -2701,6 +2684,7 @@ cmd_buffer_restart_job_for_msaa_if_needed(struct v3dv_cmd_buffer *cmd_buffer)
true, false,
old_job->frame_tiling.render_target_count,
old_job->frame_tiling.internal_bpp,
+ old_job->frame_tiling.total_color_bpp,
true /* msaa */);
v3dv_job_destroy(old_job);
@@ -2965,6 +2949,9 @@ v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer,
if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_DEPTH_BIAS))
v3dv_X(device, cmd_buffer_emit_depth_bias)(cmd_buffer);
+ if (*dirty & V3DV_CMD_DIRTY_DEPTH_BOUNDS)
+ v3dv_X(device, cmd_buffer_emit_depth_bounds)(cmd_buffer);
+
if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_BLEND_CONSTANTS))
v3dv_X(device, cmd_buffer_emit_blend)(cmd_buffer);
@@ -3309,24 +3296,6 @@ v3dv_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VERTEX_BUFFER;
}
-static uint32_t
-get_index_size(VkIndexType index_type)
-{
- switch (index_type) {
- case VK_INDEX_TYPE_UINT8_EXT:
- return 1;
- break;
- case VK_INDEX_TYPE_UINT16:
- return 2;
- break;
- case VK_INDEX_TYPE_UINT32:
- return 4;
- break;
- default:
- unreachable("Unsupported index type");
- }
-}
-
VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
VkBuffer buffer,
@@ -3335,7 +3304,7 @@ v3dv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- const uint32_t index_size = get_index_size(indexType);
+ const uint32_t index_size = vk_index_type_to_bytes(indexType);
if (buffer == cmd_buffer->state.index_buffer.buffer &&
offset == cmd_buffer->state.index_buffer.offset &&
index_size == cmd_buffer->state.index_buffer.index_size) {
@@ -3412,9 +3381,11 @@ v3dv_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
float minDepthBounds,
float maxDepthBounds)
{
- /* We do not support depth bounds testing so we just ignore this. We are
- * already asserting that pipelines don't enable the feature anyway.
- */
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds;
+ cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds;
+ cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DEPTH_BOUNDS;
}
VKAPI_ATTR void VKAPI_CALL
@@ -3435,6 +3406,304 @@ v3dv_CmdSetLineWidth(VkCommandBuffer commandBuffer,
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_LINE_WIDTH;
}
+/**
+ * This checks a descriptor set to see if are binding any descriptors that would
+ * involve sampling from a linear image (the hardware only supports this for
+ * 1D images), and if so, attempts to create a tiled copy of the linear image
+ * and rewrite the descriptor set to use that instead.
+ *
+ * This was added to support a scenario with Android where some part of the UI
+ * wanted to show previews of linear swapchain images. For more details:
+ * https://gitlab.freedesktop.org/mesa/mesa/-/issues/9712
+ *
+ * Currently this only supports a linear sampling from a simple 2D image, but
+ * it could be extended to support more cases if necessary.
+ */
+static void
+handle_sample_from_linear_image(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_descriptor_set *set,
+ bool is_compute)
+{
+ for (int32_t i = 0; i < set->layout->binding_count; i++) {
+ const struct v3dv_descriptor_set_binding_layout *blayout =
+ &set->layout->binding[i];
+ if (blayout->type != VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE &&
+ blayout->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ continue;
+
+ struct v3dv_descriptor *desc = &set->descriptors[blayout->descriptor_index];
+ if (!desc->image_view)
+ continue;
+
+ struct v3dv_image *image = (struct v3dv_image *) desc->image_view->vk.image;
+ struct v3dv_image_view *view = (struct v3dv_image_view *) desc->image_view;
+ if (image->tiled || view->vk.view_type == VK_IMAGE_VIEW_TYPE_1D ||
+ view->vk.view_type == VK_IMAGE_VIEW_TYPE_1D_ARRAY) {
+ continue;
+ }
+
+ /* FIXME: we can probably handle most of these restrictions too with
+ * a bit of extra effort.
+ */
+ if (view->vk.view_type != VK_IMAGE_VIEW_TYPE_2D ||
+ view->vk.level_count != 1 || view->vk.layer_count != 1 ||
+ blayout->array_size != 1) {
+ fprintf(stderr, "Sampling from linear image is not supported. "
+ "Expect corruption.\n");
+ continue;
+ }
+
+ /* We are sampling from a linear image. V3D doesn't support this
+ * so we create a tiled copy of the image and rewrite the descriptor
+ * to read from it instead.
+ */
+ perf_debug("Sampling from linear image is not supported natively and "
+ "requires a copy.\n");
+
+ struct v3dv_device *device = cmd_buffer->device;
+ VkDevice vk_device = v3dv_device_to_handle(device);
+
+ /* Allocate shadow tiled image if needed, we only do this once for
+ * each image, on the first sampling attempt. We need to take a lock
+ * since we may be trying to do the same in another command buffer in
+ * a separate thread.
+ */
+ mtx_lock(&device->meta.mtx);
+ VkResult result;
+ VkImage tiled_image;
+ if (image->shadow) {
+ tiled_image = v3dv_image_to_handle(image->shadow);
+ } else {
+ VkImageCreateInfo image_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .flags = image->vk.create_flags,
+ .imageType = image->vk.image_type,
+ .format = image->vk.format,
+ .extent = {
+ image->vk.extent.width,
+ image->vk.extent.height,
+ image->vk.extent.depth,
+ },
+ .mipLevels = image->vk.mip_levels,
+ .arrayLayers = image->vk.array_layers,
+ .samples = image->vk.samples,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = image->vk.usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ result = v3dv_CreateImage(vk_device, &image_info,
+ &device->vk.alloc, &tiled_image);
+ if (result != VK_SUCCESS) {
+ fprintf(stderr, "Failed to copy linear 2D image for sampling."
+ "Expect corruption.\n");
+ mtx_unlock(&device->meta.mtx);
+ continue;
+ }
+
+ bool disjoint = image->vk.create_flags & VK_IMAGE_CREATE_DISJOINT_BIT;
+ VkImageMemoryRequirementsInfo2 reqs_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
+ .image = tiled_image,
+ };
+
+ assert(image->plane_count <= V3DV_MAX_PLANE_COUNT);
+ for (int p = 0; p < (disjoint ? image->plane_count : 1); p++) {
+ VkImageAspectFlagBits plane_aspect = VK_IMAGE_ASPECT_PLANE_0_BIT << p;
+ VkImagePlaneMemoryRequirementsInfo plane_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO,
+ .planeAspect = plane_aspect,
+ };
+ if (disjoint)
+ reqs_info.pNext = &plane_info;
+
+ VkMemoryRequirements2 reqs = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+ };
+ v3dv_GetImageMemoryRequirements2(vk_device, &reqs_info, &reqs);
+
+ VkDeviceMemory mem;
+ VkMemoryAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .allocationSize = reqs.memoryRequirements.size,
+ .memoryTypeIndex = 0,
+ };
+ result = v3dv_AllocateMemory(vk_device, &alloc_info,
+ &device->vk.alloc, &mem);
+ if (result != VK_SUCCESS) {
+ fprintf(stderr, "Failed to copy linear 2D image for sampling."
+ "Expect corruption.\n");
+ v3dv_DestroyImage(vk_device, tiled_image, &device->vk.alloc);
+ mtx_unlock(&device->meta.mtx);
+ continue;
+ }
+
+ VkBindImageMemoryInfo bind_info = {
+ .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO,
+ .image = tiled_image,
+ .memory = mem,
+ .memoryOffset = 0,
+ };
+ VkBindImagePlaneMemoryInfo plane_bind_info = {
+ .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO,
+ .planeAspect = plane_aspect,
+ };
+ if (disjoint)
+ bind_info.pNext = &plane_bind_info;
+ result = v3dv_BindImageMemory2(vk_device, 1, &bind_info);
+ if (result != VK_SUCCESS) {
+ fprintf(stderr, "Failed to copy linear 2D image for sampling."
+ "Expect corruption.\n");
+ v3dv_DestroyImage(vk_device, tiled_image, &device->vk.alloc);
+ v3dv_FreeMemory(vk_device, mem, &device->vk.alloc);
+ mtx_unlock(&device->meta.mtx);
+ continue;
+ }
+ }
+
+ image->shadow = v3dv_image_from_handle(tiled_image);
+ }
+
+ /* Create a shadow view that refers to the tiled image if needed */
+ VkImageView tiled_view;
+ if (view->shadow) {
+ tiled_view = v3dv_image_view_to_handle(view->shadow);
+ } else {
+ VkImageViewCreateInfo view_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .flags = view->vk.create_flags,
+ .image = tiled_image,
+ .viewType = view->vk.view_type,
+ .format = view->vk.format,
+ .components = view->vk.swizzle,
+ .subresourceRange = {
+ .aspectMask = view->vk.aspects,
+ .baseMipLevel = view->vk.base_mip_level,
+ .levelCount = view->vk.level_count,
+ .baseArrayLayer = view->vk.base_array_layer,
+ .layerCount = view->vk.layer_count,
+ },
+ };
+ result = v3dv_create_image_view(device, &view_info, &tiled_view);
+ if (result != VK_SUCCESS) {
+ fprintf(stderr, "Failed to copy linear 2D image for sampling."
+ "Expect corruption.\n");
+ mtx_unlock(&device->meta.mtx);
+ continue;
+ }
+ }
+
+ view->shadow = v3dv_image_view_from_handle(tiled_view);
+
+ mtx_unlock(&device->meta.mtx);
+
+ /* Rewrite the descriptor to use the shadow view */
+ VkDescriptorImageInfo desc_image_info = {
+ .sampler = v3dv_sampler_to_handle(desc->sampler),
+ .imageView = tiled_view,
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ VkWriteDescriptorSet write = {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = v3dv_descriptor_set_to_handle(set),
+ .dstBinding = i,
+ .dstArrayElement = 0, /* Assumes array_size is 1 */
+ .descriptorCount = 1,
+ .descriptorType = desc->type,
+ .pImageInfo = &desc_image_info,
+ };
+ v3dv_UpdateDescriptorSets(vk_device, 1, &write, 0, NULL);
+
+ /* Now we need to actually copy the pixel data from the linear image
+ * into the tiled image storage to ensure it is up-to-date.
+ *
+ * FIXME: ideally we would track if the linear image is dirty and skip
+ * this step otherwise, but that would be a bit of a pain.
+ *
+ * Note that we need to place the copy job *before* the current job in
+ * the command buffer state so we have the tiled image ready to process
+ * an upcoming draw call in the current job that samples from it.
+ *
+ * Also, we need to use the TFU path for this copy, as any other path
+ * will use the tile buffer and would require a new framebuffer setup,
+ * thus requiring extra work to stop and resume any in-flight render
+ * pass. Since we are converting a full 2D texture here the TFU should
+ * be able to handle this.
+ */
+ for (int p = 0; p < image->plane_count; p++) {
+ VkImageAspectFlagBits plane_aspect = VK_IMAGE_ASPECT_PLANE_0_BIT << p;
+ struct VkImageCopy2 copy_region = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2,
+ .srcSubresource = {
+ .aspectMask = image->plane_count == 1 ?
+ view->vk.aspects : (view->vk.aspects & plane_aspect),
+ .mipLevel = view->vk.base_mip_level,
+ .baseArrayLayer = view->vk.base_array_layer,
+ .layerCount = view->vk.layer_count,
+ },
+ .srcOffset = {0, 0, 0 },
+ .dstSubresource = {
+ .aspectMask = image->plane_count == 1 ?
+ view->vk.aspects : (view->vk.aspects & plane_aspect),
+ .mipLevel = view->vk.base_mip_level,
+ .baseArrayLayer = view->vk.base_array_layer,
+ .layerCount = view->vk.layer_count,
+ },
+ .dstOffset = { 0, 0, 0},
+ .extent = {
+ image->planes[p].width,
+ image->planes[p].height,
+ 1,
+ },
+ };
+ struct v3dv_image *copy_src = image;
+ struct v3dv_image *copy_dst = v3dv_image_from_handle(tiled_image);
+ bool ok = v3dv_cmd_buffer_copy_image_tfu(cmd_buffer, copy_dst, copy_src,
+ &copy_region);
+ if (ok) {
+ /* This will emit the TFU job right before the current in-flight
+ * job (if any), since in-fight jobs are only added to the list
+ * when finished.
+ */
+ struct v3dv_job *tfu_job =
+ list_last_entry(&cmd_buffer->jobs, struct v3dv_job, list_link);
+ assert(tfu_job->type == V3DV_JOB_TYPE_GPU_TFU);
+ /* Serialize the copy since we don't know who is producing the linear
+ * image and we need the image to be ready by the time the copy
+ * executes.
+ */
+ tfu_job->serialize = V3DV_BARRIER_ALL;
+
+ /* Also, we need to ensure the TFU copy job completes before anyhing
+ * else coming after that may be using the tiled shadow copy.
+ */
+ if (cmd_buffer->state.job) {
+ /* If we already had an in-flight job (i.e. we are in a render
+ * pass) make sure the job waits for the TFU copy.
+ */
+ cmd_buffer->state.job->serialize |= V3DV_BARRIER_TRANSFER_BIT;
+ } else {
+ /* Otherwise, make the the follow-up job syncs with the TFU
+ * job we just added when it is created by adding the
+ * corresponding barrier state.
+ */
+ if (!is_compute) {
+ cmd_buffer->state.barrier.dst_mask |= V3DV_BARRIER_GRAPHICS_BIT;
+ cmd_buffer->state.barrier.src_mask_graphics |= V3DV_BARRIER_TRANSFER_BIT;
+ } else {
+ cmd_buffer->state.barrier.dst_mask |= V3DV_BARRIER_COMPUTE_BIT;
+ cmd_buffer->state.barrier.src_mask_compute |= V3DV_BARRIER_TRANSFER_BIT;
+ }
+ }
+ } else {
+ fprintf(stderr, "Failed to copy linear 2D image for sampling."
+ "TFU doesn't support copy. Expect corruption.\n");
+ }
+ }
+ }
+}
+
VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
@@ -3468,6 +3737,15 @@ v3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
descriptor_state->descriptor_sets[index] = set;
dirty_stages |= set->layout->shader_stages;
descriptor_state_changed = true;
+
+ /* Check if we are sampling from a linear 2D image. This is not
+ * supported in hardware, but may be required for some applications
+ * so we will transparently convert to tiled at the expense of
+ * performance.
+ */
+ handle_sample_from_linear_image(cmd_buffer, set,
+ pipelineBindPoint ==
+ VK_PIPELINE_BIND_POINT_COMPUTE);
}
for (uint32_t j = 0; j < set->layout->dynamic_offset_count; j++, dyn_index++) {
@@ -3846,6 +4124,7 @@ cmd_buffer_emit_pre_dispatch(struct v3dv_cmd_buffer *cmd_buffer)
void
v3dv_cmd_buffer_rewrite_indirect_csd_job(
+ struct v3dv_device *device,
struct v3dv_csd_indirect_cpu_job_info *info,
const uint32_t *wg_counts)
{
@@ -3865,15 +4144,22 @@ v3dv_cmd_buffer_rewrite_indirect_csd_job(
submit->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
submit->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
- submit->cfg[4] = DIV_ROUND_UP(info->wg_size, 16) *
- (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1;
+ uint32_t num_batches = DIV_ROUND_UP(info->wg_size, 16) *
+ (wg_counts[0] * wg_counts[1] * wg_counts[2]);
+ /* V3D 7.1.6 and later don't subtract 1 from the number of batches */
+ if (device->devinfo.ver < 71 ||
+ (device->devinfo.ver == 71 && device->devinfo.rev < 6)) {
+ submit->cfg[4] = num_batches - 1;
+ } else {
+ submit->cfg[4] = num_batches;
+ }
assert(submit->cfg[4] != ~0);
if (info->needs_wg_uniform_rewrite) {
/* Make sure the GPU is not currently accessing the indirect CL for this
* job, since we are about to overwrite some of the uniform data.
*/
- v3dv_bo_wait(job->device, job->indirect.bo, PIPE_TIMEOUT_INFINITE);
+ v3dv_bo_wait(job->device, job->indirect.bo, OS_TIMEOUT_INFINITE);
for (uint32_t i = 0; i < 3; i++) {
if (info->wg_uniform_offsets[i]) {
@@ -3899,6 +4185,7 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t **wg_uniform_offsets_out,
uint32_t *wg_size_out)
{
+ struct v3dv_device *device = cmd_buffer->device;
struct v3dv_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
assert(pipeline && pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
struct v3dv_shader_variant *cs_variant =
@@ -3957,18 +4244,26 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
if (wg_size_out)
*wg_size_out = wg_size;
- submit->cfg[4] = num_batches - 1;
+ /* V3D 7.1.6 and later don't subtract 1 from the number of batches */
+ if (device->devinfo.ver < 71 ||
+ (device->devinfo.ver == 71 && device->devinfo.rev < 6)) {
+ submit->cfg[4] = num_batches - 1;
+ } else {
+ submit->cfg[4] = num_batches;
+ }
assert(submit->cfg[4] != ~0);
assert(pipeline->shared_data->assembly_bo);
struct v3dv_bo *cs_assembly_bo = pipeline->shared_data->assembly_bo;
submit->cfg[5] = cs_assembly_bo->offset + cs_variant->assembly_offset;
- submit->cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS;
if (cs_variant->prog_data.base->single_seg)
submit->cfg[5] |= V3D_CSD_CFG5_SINGLE_SEG;
if (cs_variant->prog_data.base->threads == 4)
submit->cfg[5] |= V3D_CSD_CFG5_THREADING;
+ /* V3D 7.x has made the PROPAGATE_NANS bit in CFG5 reserved */
+ if (device->devinfo.ver < 71)
+ submit->cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS;
if (cs_variant->prog_data.cs->shared_size > 0) {
job->csd.shared_memory =
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_device.c b/lib/mesa/src/broadcom/vulkan/v3dv_device.c
index 0590afeb7..027c35ffe 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_device.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_device.c
@@ -49,9 +49,14 @@
#include "git_sha1.h"
#include "util/build_id.h"
+#include "util/os_file.h"
#include "util/u_debug.h"
#include "util/format/u_format.h"
+#ifdef ANDROID
+#include "vk_android.h"
+#endif
+
#ifdef VK_USE_PLATFORM_XCB_KHR
#include <xcb/xcb.h>
#include <xcb/dri3.h>
@@ -63,11 +68,14 @@
#include "wayland-drm-client-protocol.h"
#endif
-#ifndef ANDROID
-# define V3DV_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
-#else
-/* Android CDD require additional extensions for API v1.1+ */
-# define V3DV_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
+#define V3DV_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
+
+#ifdef ANDROID
+#if ANDROID_API_LEVEL <= 32
+/* Android 12.1 and lower support only Vulkan API v1.1 */
+#undef V3DV_API_VERSION
+#define V3DV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
+#endif
#endif
VKAPI_ATTR VkResult VKAPI_CALL
@@ -156,9 +164,7 @@ get_device_extensions(const struct v3dv_physical_device *device,
.KHR_shader_float_controls = true,
.KHR_shader_non_semantic_info = true,
.KHR_sampler_mirror_clamp_to_edge = true,
-#ifndef ANDROID
.KHR_sampler_ycbcr_conversion = true,
-#endif
.KHR_spirv_1_4 = true,
.KHR_storage_buffer_storage_class = true,
.KHR_timeline_semaphore = true,
@@ -202,11 +208,237 @@ get_device_extensions(const struct v3dv_physical_device *device,
.EXT_tooling_info = true,
.EXT_vertex_attribute_divisor = true,
#ifdef ANDROID
+ .ANDROID_external_memory_android_hardware_buffer = true,
.ANDROID_native_buffer = true,
+ .EXT_queue_family_foreign = true,
#endif
};
}
+static void
+get_features(const struct v3dv_physical_device *physical_device,
+ struct vk_features *features)
+{
+ *features = (struct vk_features) {
+ /* Vulkan 1.0 */
+ .robustBufferAccess = true, /* This feature is mandatory */
+ .fullDrawIndexUint32 = physical_device->devinfo.ver >= 71,
+ .imageCubeArray = true,
+ .independentBlend = true,
+ .geometryShader = true,
+ .tessellationShader = false,
+ .sampleRateShading = true,
+ .dualSrcBlend = false,
+ .logicOp = true,
+ .multiDrawIndirect = false,
+ .drawIndirectFirstInstance = true,
+ .depthClamp = physical_device->devinfo.ver >= 71,
+ .depthBiasClamp = true,
+ .fillModeNonSolid = true,
+ .depthBounds = physical_device->devinfo.ver >= 71,
+ .wideLines = true,
+ .largePoints = true,
+ .alphaToOne = true,
+ .multiViewport = false,
+ .samplerAnisotropy = true,
+ .textureCompressionETC2 = true,
+ .textureCompressionASTC_LDR = true,
+ /* Note that textureCompressionBC requires that the driver support all
+ * the BC formats. V3D 4.2 only support the BC1-3, so we can't claim
+ * that we support it.
+ */
+ .textureCompressionBC = false,
+ .occlusionQueryPrecise = true,
+ .pipelineStatisticsQuery = false,
+ .vertexPipelineStoresAndAtomics = true,
+ .fragmentStoresAndAtomics = true,
+ .shaderTessellationAndGeometryPointSize = true,
+ .shaderImageGatherExtended = true,
+ .shaderStorageImageExtendedFormats = true,
+ .shaderStorageImageMultisample = false,
+ .shaderStorageImageReadWithoutFormat = true,
+ .shaderStorageImageWriteWithoutFormat = false,
+ .shaderUniformBufferArrayDynamicIndexing = false,
+ .shaderSampledImageArrayDynamicIndexing = false,
+ .shaderStorageBufferArrayDynamicIndexing = false,
+ .shaderStorageImageArrayDynamicIndexing = false,
+ .shaderClipDistance = true,
+ .shaderCullDistance = false,
+ .shaderFloat64 = false,
+ .shaderInt64 = false,
+ .shaderInt16 = false,
+ .shaderResourceResidency = false,
+ .shaderResourceMinLod = false,
+ .sparseBinding = false,
+ .sparseResidencyBuffer = false,
+ .sparseResidencyImage2D = false,
+ .sparseResidencyImage3D = false,
+ .sparseResidency2Samples = false,
+ .sparseResidency4Samples = false,
+ .sparseResidency8Samples = false,
+ .sparseResidency16Samples = false,
+ .sparseResidencyAliased = false,
+ .variableMultisampleRate = false,
+ .inheritedQueries = true,
+
+ /* Vulkan 1.1 */
+ .storageBuffer16BitAccess = true,
+ .uniformAndStorageBuffer16BitAccess = true,
+ .storagePushConstant16 = true,
+ .storageInputOutput16 = false,
+ .multiview = true,
+ .multiviewGeometryShader = false,
+ .multiviewTessellationShader = false,
+ .variablePointersStorageBuffer = true,
+ /* FIXME: this needs support for non-constant index on UBO/SSBO */
+ .variablePointers = false,
+ .protectedMemory = false,
+ .samplerYcbcrConversion = true,
+ .shaderDrawParameters = false,
+
+ /* Vulkan 1.2 */
+ .hostQueryReset = true,
+ .uniformAndStorageBuffer8BitAccess = true,
+ .uniformBufferStandardLayout = true,
+ /* V3D 4.2 wraps TMU vector accesses to 16-byte boundaries, so loads and
+ * stores of vectors that cross these boundaries would not work correctly
+ * with scalarBlockLayout and would need to be split into smaller vectors
+ * (and/or scalars) that don't cross these boundaries. For load/stores
+ * with dynamic offsets where we can't identify if the offset is
+ * problematic, we would always have to scalarize. Overall, this would
+ * not lead to best performance so let's just not support it.
+ */
+ .scalarBlockLayout = physical_device->devinfo.ver >= 71,
+ /* This tells applications 2 things:
+ *
+ * 1. If they can select just one aspect for barriers. For us barriers
+ * decide if we need to split a job and we don't care if it is only
+ * for one of the aspects of the image or both, so we don't really
+ * benefit from seeing barriers that select just one aspect.
+ *
+ * 2. If they can program different layouts for each aspect. We
+ * generally don't care about layouts, so again, we don't get any
+ * benefits from this to limit the scope of image layout transitions.
+ *
+ * Still, Vulkan 1.2 requires this feature to be supported so we
+ * advertise it even though we don't really take advantage of it.
+ */
+ .separateDepthStencilLayouts = true,
+ .storageBuffer8BitAccess = true,
+ .storagePushConstant8 = true,
+ .imagelessFramebuffer = true,
+ .timelineSemaphore = true,
+
+ .samplerMirrorClampToEdge = true,
+
+ /* These are mandatory by Vulkan 1.2, however, we don't support any of
+ * the optional features affected by them (non 32-bit types for
+ * shaderSubgroupExtendedTypes and additional subgroup ballot for
+ * subgroupBroadcastDynamicId), so in practice setting them to true
+ * doesn't have any implications for us until we implement any of these
+ * optional features.
+ */
+ .shaderSubgroupExtendedTypes = true,
+ .subgroupBroadcastDynamicId = true,
+
+ .vulkanMemoryModel = true,
+ .vulkanMemoryModelDeviceScope = true,
+ .vulkanMemoryModelAvailabilityVisibilityChains = true,
+
+ .bufferDeviceAddress = true,
+ .bufferDeviceAddressCaptureReplay = false,
+ .bufferDeviceAddressMultiDevice = false,
+
+ /* Vulkan 1.3 */
+ .inlineUniformBlock = true,
+ /* Inline buffers work like push constants, so after their are bound
+ * some of their contents may be copied into the uniform stream as soon
+ * as the next draw/dispatch is recorded in the command buffer. This means
+ * that if the client updates the buffer contents after binding it to
+ * a command buffer, the next queue submit of that command buffer may
+ * not use the latest update to the buffer contents, but the data that
+ * was present in the buffer at the time it was bound to the command
+ * buffer.
+ */
+ .descriptorBindingInlineUniformBlockUpdateAfterBind = false,
+ .pipelineCreationCacheControl = true,
+ .privateData = true,
+ .maintenance4 = true,
+ .shaderZeroInitializeWorkgroupMemory = true,
+ .synchronization2 = true,
+ .robustImageAccess = true,
+ .shaderIntegerDotProduct = true,
+
+ /* VK_EXT_4444_formats */
+ .formatA4R4G4B4 = true,
+ .formatA4B4G4R4 = true,
+
+ /* VK_EXT_custom_border_color */
+ .customBorderColors = true,
+ .customBorderColorWithoutFormat = false,
+
+ /* VK_EXT_index_type_uint8 */
+ .indexTypeUint8 = true,
+
+ /* VK_EXT_line_rasterization */
+ .rectangularLines = true,
+ .bresenhamLines = true,
+ .smoothLines = false,
+ .stippledRectangularLines = false,
+ .stippledBresenhamLines = false,
+ .stippledSmoothLines = false,
+
+ /* VK_EXT_color_write_enable */
+ .colorWriteEnable = true,
+
+ /* VK_KHR_pipeline_executable_properties */
+ .pipelineExecutableInfo = true,
+
+ /* VK_EXT_provoking_vertex */
+ .provokingVertexLast = true,
+ /* FIXME: update when supporting EXT_transform_feedback */
+ .transformFeedbackPreservesProvokingVertex = false,
+
+ /* VK_EXT_vertex_attribute_divisor */
+ .vertexAttributeInstanceRateDivisor = true,
+ .vertexAttributeInstanceRateZeroDivisor = false,
+
+ /* VK_KHR_performance_query */
+ .performanceCounterQueryPools = physical_device->caps.perfmon,
+ .performanceCounterMultipleQueryPools = false,
+
+ /* VK_EXT_texel_buffer_alignment */
+ .texelBufferAlignment = true,
+
+ /* VK_KHR_workgroup_memory_explicit_layout */
+ .workgroupMemoryExplicitLayout = true,
+ .workgroupMemoryExplicitLayoutScalarBlockLayout = false,
+ .workgroupMemoryExplicitLayout8BitAccess = true,
+ .workgroupMemoryExplicitLayout16BitAccess = true,
+
+ /* VK_EXT_border_color_swizzle */
+ .borderColorSwizzle = true,
+ .borderColorSwizzleFromImage = true,
+
+ /* VK_EXT_shader_module_identifier */
+ .shaderModuleIdentifier = true,
+
+ /* VK_EXT_depth_clip_control */
+ .depthClipControl = true,
+
+ /* VK_EXT_attachment_feedback_loop_layout */
+ .attachmentFeedbackLoopLayout = true,
+
+ /* VK_EXT_primitive_topology_list_restart */
+ .primitiveTopologyListRestart = true,
+ /* FIXME: we don't support tessellation shaders yet */
+ .primitiveTopologyPatchListRestart = false,
+
+ /* VK_EXT_pipeline_robustness */
+ .pipelineRobustness = true,
+ };
+}
+
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateInstanceExtensionProperties(const char *pLayerName,
uint32_t *pPropertyCount,
@@ -321,8 +553,6 @@ physical_device_finish(struct v3dv_physical_device *device)
close(device->render_fd);
if (device->display_fd >= 0)
close(device->display_fd);
- if (device->master_fd >= 0)
- close(device->master_fd);
free(device->name);
@@ -404,273 +634,6 @@ compute_memory_budget(struct v3dv_physical_device *device)
return MIN2(heap_size, heap_used + heap_available);
}
-#if !using_v3d_simulator
-#ifdef VK_USE_PLATFORM_XCB_KHR
-static int
-create_display_fd_xcb(VkIcdSurfaceBase *surface)
-{
- int fd = -1;
-
- xcb_connection_t *conn;
- xcb_dri3_open_reply_t *reply = NULL;
- if (surface) {
- if (surface->platform == VK_ICD_WSI_PLATFORM_XLIB)
- conn = XGetXCBConnection(((VkIcdSurfaceXlib *)surface)->dpy);
- else
- conn = ((VkIcdSurfaceXcb *)surface)->connection;
- } else {
- conn = xcb_connect(NULL, NULL);
- }
-
- if (xcb_connection_has_error(conn))
- goto finish;
-
- const xcb_setup_t *setup = xcb_get_setup(conn);
- xcb_screen_iterator_t iter = xcb_setup_roots_iterator(setup);
- xcb_screen_t *screen = iter.data;
-
- xcb_dri3_open_cookie_t cookie;
- cookie = xcb_dri3_open(conn, screen->root, None);
- reply = xcb_dri3_open_reply(conn, cookie, NULL);
- if (!reply)
- goto finish;
-
- if (reply->nfd != 1)
- goto finish;
-
- fd = xcb_dri3_open_reply_fds(conn, reply)[0];
- fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
-
-finish:
- if (!surface)
- xcb_disconnect(conn);
- if (reply)
- free(reply);
-
- return fd;
-}
-#endif
-
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
-struct v3dv_wayland_info {
- struct wl_drm *wl_drm;
- int fd;
- bool is_set;
- bool authenticated;
-};
-
-static void
-v3dv_drm_handle_device(void *data, struct wl_drm *drm, const char *device)
-{
- struct v3dv_wayland_info *info = data;
- info->fd = open(device, O_RDWR | O_CLOEXEC);
- info->is_set = info->fd != -1;
- if (!info->is_set) {
- fprintf(stderr, "v3dv_drm_handle_device: could not open %s (%s)\n",
- device, strerror(errno));
- return;
- }
-
- drm_magic_t magic;
- if (drmGetMagic(info->fd, &magic)) {
- fprintf(stderr, "v3dv_drm_handle_device: drmGetMagic failed\n");
- close(info->fd);
- info->fd = -1;
- info->is_set = false;
- return;
- }
- wl_drm_authenticate(info->wl_drm, magic);
-}
-
-static void
-v3dv_drm_handle_format(void *data, struct wl_drm *drm, uint32_t format)
-{
-}
-
-static void
-v3dv_drm_handle_authenticated(void *data, struct wl_drm *drm)
-{
- struct v3dv_wayland_info *info = data;
- info->authenticated = true;
-}
-
-static void
-v3dv_drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t value)
-{
-}
-
-struct wl_drm_listener v3dv_drm_listener = {
- .device = v3dv_drm_handle_device,
- .format = v3dv_drm_handle_format,
- .authenticated = v3dv_drm_handle_authenticated,
- .capabilities = v3dv_drm_handle_capabilities
-};
-
-static void
-v3dv_registry_global(void *data,
- struct wl_registry *registry,
- uint32_t name,
- const char *interface,
- uint32_t version)
-{
- struct v3dv_wayland_info *info = data;
- if (strcmp(interface, wl_drm_interface.name) == 0) {
- info->wl_drm = wl_registry_bind(registry, name, &wl_drm_interface,
- MIN2(version, 2));
- wl_drm_add_listener(info->wl_drm, &v3dv_drm_listener, data);
- };
-}
-
-static void
-v3dv_registry_global_remove_cb(void *data,
- struct wl_registry *registry,
- uint32_t name)
-{
-}
-
-static int
-create_display_fd_wayland(VkIcdSurfaceBase *surface)
-{
- struct wl_display *display;
- struct wl_registry *registry = NULL;
-
- struct v3dv_wayland_info info = {
- .wl_drm = NULL,
- .fd = -1,
- .is_set = false,
- .authenticated = false
- };
-
- if (surface)
- display = ((VkIcdSurfaceWayland *) surface)->display;
- else
- display = wl_display_connect(NULL);
-
- if (!display)
- return -1;
-
- registry = wl_display_get_registry(display);
- if (!registry) {
- if (!surface)
- wl_display_disconnect(display);
- return -1;
- }
-
- static const struct wl_registry_listener registry_listener = {
- v3dv_registry_global,
- v3dv_registry_global_remove_cb
- };
- wl_registry_add_listener(registry, &registry_listener, &info);
-
- wl_display_roundtrip(display); /* For the registry advertisement */
- wl_display_roundtrip(display); /* For the DRM device event */
- wl_display_roundtrip(display); /* For the authentication event */
-
- wl_drm_destroy(info.wl_drm);
- wl_registry_destroy(registry);
-
- if (!surface)
- wl_display_disconnect(display);
-
- if (!info.is_set)
- return -1;
-
- if (!info.authenticated)
- return -1;
-
- return info.fd;
-}
-#endif
-
-/* Acquire an authenticated display fd without a surface reference. This is the
- * case where the application is making WSI allocations outside the Vulkan
- * swapchain context (only Zink, for now). Since we lack information about the
- * underlying surface we just try our best to figure out the correct display
- * and platform to use. It should work in most cases.
- */
-static void
-acquire_display_device_no_surface(struct v3dv_physical_device *pdevice)
-{
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
- pdevice->display_fd = create_display_fd_wayland(NULL);
-#endif
-
-#ifdef VK_USE_PLATFORM_XCB_KHR
- if (pdevice->display_fd == -1)
- pdevice->display_fd = create_display_fd_xcb(NULL);
-#endif
-
-#ifdef VK_USE_PLATFORM_DISPLAY_KHR
- if (pdevice->display_fd == - 1 && pdevice->master_fd >= 0)
- pdevice->display_fd = dup(pdevice->master_fd);
-#endif
-}
-
-/* Acquire an authenticated display fd from the surface. This is the regular
- * case where the application is using swapchains to create WSI allocations.
- * In this case we use the surface information to figure out the correct
- * display and platform combination.
- */
-static void
-acquire_display_device_surface(struct v3dv_physical_device *pdevice,
- VkIcdSurfaceBase *surface)
-{
- /* Mesa will set both of VK_USE_PLATFORM_{XCB,XLIB} when building with
- * platform X11, so only check for XCB and rely on XCB to get an
- * authenticated device also for Xlib.
- */
-#ifdef VK_USE_PLATFORM_XCB_KHR
- if (surface->platform == VK_ICD_WSI_PLATFORM_XCB ||
- surface->platform == VK_ICD_WSI_PLATFORM_XLIB) {
- pdevice->display_fd = create_display_fd_xcb(surface);
- }
-#endif
-
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
- if (surface->platform == VK_ICD_WSI_PLATFORM_WAYLAND)
- pdevice->display_fd = create_display_fd_wayland(surface);
-#endif
-
-#ifdef VK_USE_PLATFORM_DISPLAY_KHR
- if (surface->platform == VK_ICD_WSI_PLATFORM_DISPLAY &&
- pdevice->master_fd >= 0) {
- pdevice->display_fd = dup(pdevice->master_fd);
- }
-#endif
-}
-#endif /* !using_v3d_simulator */
-
-/* Attempts to get an authenticated display fd from the display server that
- * we can use to allocate BOs for presentable images.
- */
-VkResult
-v3dv_physical_device_acquire_display(struct v3dv_physical_device *pdevice,
- VkIcdSurfaceBase *surface)
-{
- VkResult result = VK_SUCCESS;
- mtx_lock(&pdevice->mutex);
-
- if (pdevice->display_fd != -1)
- goto done;
-
- /* When running on the simulator we do everything on a single render node so
- * we don't need to get an authenticated display fd from the display server.
- */
-#if !using_v3d_simulator
- if (surface)
- acquire_display_device_surface(pdevice, surface);
- else
- acquire_display_device_no_surface(pdevice);
-
- if (pdevice->display_fd == -1)
- result = VK_ERROR_INITIALIZATION_FAILED;
-#endif
-
-done:
- mtx_unlock(&pdevice->mutex);
- return result;
-}
-
static bool
v3d_has_feature(struct v3dv_physical_device *device, enum drm_v3d_param feature)
{
@@ -763,11 +726,11 @@ v3dv_physical_device_init_disk_cache(struct v3dv_physical_device *device)
static VkResult
create_physical_device(struct v3dv_instance *instance,
- drmDevicePtr drm_render_device,
- drmDevicePtr drm_primary_device)
+ drmDevicePtr gpu_device,
+ drmDevicePtr display_device)
{
VkResult result = VK_SUCCESS;
- int32_t master_fd = -1;
+ int32_t display_fd = -1;
int32_t render_fd = -1;
struct v3dv_physical_device *device =
@@ -783,14 +746,14 @@ create_physical_device(struct v3dv_instance *instance,
vk_physical_device_dispatch_table_from_entrypoints(
&dispatch_table, &wsi_physical_device_entrypoints, false);
- result = vk_physical_device_init(&device->vk, &instance->vk, NULL,
- &dispatch_table);
+ result = vk_physical_device_init(&device->vk, &instance->vk, NULL, NULL,
+ NULL, &dispatch_table);
if (result != VK_SUCCESS)
goto fail;
- assert(drm_render_device);
- const char *path = drm_render_device->nodes[DRM_NODE_RENDER];
+ assert(gpu_device);
+ const char *path = gpu_device->nodes[DRM_NODE_RENDER];
render_fd = open(path, O_RDWR | O_CLOEXEC);
if (render_fd < 0) {
fprintf(stderr, "Opening %s failed: %s\n", path, strerror(errno));
@@ -805,12 +768,12 @@ create_physical_device(struct v3dv_instance *instance,
const char *primary_path;
#if !using_v3d_simulator
- if (drm_primary_device)
- primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY];
+ if (display_device)
+ primary_path = display_device->nodes[DRM_NODE_PRIMARY];
else
primary_path = NULL;
#else
- primary_path = drm_render_device->nodes[DRM_NODE_PRIMARY];
+ primary_path = gpu_device->nodes[DRM_NODE_PRIMARY];
#endif
struct stat primary_stat = {0}, render_stat = {0};
@@ -837,20 +800,23 @@ create_physical_device(struct v3dv_instance *instance,
device->render_devid = render_stat.st_rdev;
#if using_v3d_simulator
- device->device_id = drm_render_device->deviceinfo.pci->device_id;
+ device->device_id = gpu_device->deviceinfo.pci->device_id;
#endif
if (instance->vk.enabled_extensions.KHR_display ||
+ instance->vk.enabled_extensions.KHR_xcb_surface ||
+ instance->vk.enabled_extensions.KHR_xlib_surface ||
+ instance->vk.enabled_extensions.KHR_wayland_surface ||
instance->vk.enabled_extensions.EXT_acquire_drm_display) {
#if !using_v3d_simulator
/* Open the primary node on the vc4 display device */
- assert(drm_primary_device);
- master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
+ assert(display_device);
+ display_fd = open(primary_path, O_RDWR | O_CLOEXEC);
#else
/* There is only one device with primary and render nodes.
* Open its primary node.
*/
- master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
+ display_fd = open(primary_path, O_RDWR | O_CLOEXEC);
#endif
}
@@ -859,8 +825,7 @@ create_physical_device(struct v3dv_instance *instance,
#endif
device->render_fd = render_fd; /* The v3d render node */
- device->display_fd = -1; /* Authenticated vc4 primary node */
- device->master_fd = master_fd; /* Master vc4 primary node */
+ device->display_fd = display_fd; /* Master vc4 primary node */
if (!v3d_get_device_info(device->render_fd, &device->devinfo, &v3dv_ioctl)) {
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
@@ -895,8 +860,10 @@ create_physical_device(struct v3dv_instance *instance,
device->next_program_id = 0;
ASSERTED int len =
- asprintf(&device->name, "V3D %d.%d",
- device->devinfo.ver / 10, device->devinfo.ver % 10);
+ asprintf(&device->name, "V3D %d.%d.%d",
+ device->devinfo.ver / 10,
+ device->devinfo.ver % 10,
+ device->devinfo.rev);
assert(len != -1);
v3dv_physical_device_init_disk_cache(device);
@@ -928,36 +895,6 @@ create_physical_device(struct v3dv_instance *instance,
*/
device->drm_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
-#if using_v3d_simulator
- /* There are CTS tests which do the following:
- *
- * 1. Create a command buffer with a vkCmdWaitEvents()
- * 2. Submit the command buffer
- * 3. vkGetSemaphoreFdKHR() to try to get a sync_file
- * 4. vkSetEvent()
- *
- * This deadlocks in the simulator because we have to wait for the syncobj
- * to get a real fence in vkGetSemaphoreFdKHR(). This will never happen
- * though because the simulator, unlike real hardware, executes ioctls
- * synchronously in the same thread, which means that it will try to
- * execute the wait for event immediately and never get to emit the
- * signaling job that comes after the compute job that implements the wait
- * in the command buffer, which would be responsible for creating the fence
- * for the signaling semaphore.
- *
- * This behavior was seemingly allowed in previous Vulkan versions, however,
- * this was fixed in Vulkan the 1.3.228 spec. From commit 355367640f2e:
- *
- * "Clarify that vkCmdWaitEvents must not execute before a vkSetEvent it
- * waits on (internal issue 2971)"
- *
- * Either way, we disable sync file support in the simulator for now, until
- * the CTS is fixed.
- */
- device->drm_syncobj_type.import_sync_file = NULL;
- device->drm_syncobj_type.export_sync_file = NULL;
-#endif
-
/* Multiwait is required for emulated timeline semaphores and is supported
* by the v3d kernel interface.
*/
@@ -978,6 +915,7 @@ create_physical_device(struct v3dv_instance *instance,
}
get_device_extensions(device, &device->vk.supported_extensions);
+ get_features(device, &device->vk.supported_features);
mtx_init(&device->mutex, mtx_plain);
@@ -991,8 +929,8 @@ fail:
if (render_fd >= 0)
close(render_fd);
- if (master_fd >= 0)
- close(master_fd);
+ if (display_fd >= 0)
+ close(display_fd);
return result;
}
@@ -1035,14 +973,13 @@ enumerate_devices(struct vk_instance *vk_instance)
break;
}
#else
- /* On actual hardware, we should have a render node (v3d)
- * and a primary node (vc4). We will need to use the primary
- * to allocate WSI buffers and share them with the render node
- * via prime, but that is a privileged operation so we need the
- * primary node to be authenticated, and for that we need the
- * display server to provide the device fd (with DRI3), so we
- * here we only check that the device is present but we don't
- * try to open it.
+ /* On actual hardware, we should have a gpu device (v3d) and a display
+ * device (vc4). We will need to use the display device to allocate WSI
+ * buffers and share them with the render node via prime, but that is a
+ * privileged operation so we need t have an authenticated display fd
+ * and for that we need the display server to provide the it (with DRI3),
+ * so here we only check that the device is present but we don't try to
+ * open it.
*/
if (devices[i]->bustype != DRM_BUS_PLATFORM)
continue;
@@ -1050,7 +987,8 @@ enumerate_devices(struct vk_instance *vk_instance)
if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER) {
char **compat = devices[i]->deviceinfo.platform->compatible;
while (*compat) {
- if (strncmp(*compat, "brcm,2711-v3d", 13) == 0) {
+ if (strncmp(*compat, "brcm,2711-v3d", 13) == 0 ||
+ strncmp(*compat, "brcm,2712-v3d", 13) == 0) {
v3d_idx = i;
break;
}
@@ -1059,8 +997,9 @@ enumerate_devices(struct vk_instance *vk_instance)
} else if (devices[i]->available_nodes & 1 << DRM_NODE_PRIMARY) {
char **compat = devices[i]->deviceinfo.platform->compatible;
while (*compat) {
- if (strncmp(*compat, "brcm,bcm2711-vc5", 16) == 0 ||
- strncmp(*compat, "brcm,bcm2835-vc4", 16) == 0 ) {
+ if (strncmp(*compat, "brcm,bcm2712-vc6", 16) == 0 ||
+ strncmp(*compat, "brcm,bcm2711-vc5", 16) == 0 ||
+ strncmp(*compat, "brcm,bcm2835-vc4", 16) == 0) {
vc4_idx = i;
break;
}
@@ -1071,9 +1010,10 @@ enumerate_devices(struct vk_instance *vk_instance)
}
#if !using_v3d_simulator
- if (v3d_idx != -1 && vc4_idx != -1) {
- result =
- create_physical_device(instance, devices[v3d_idx], devices[vc4_idx]);
+ if (v3d_idx != -1) {
+ drmDevicePtr v3d_device = devices[v3d_idx];
+ drmDevicePtr vc4_device = vc4_idx != -1 ? devices[vc4_idx] : NULL;
+ result = create_physical_device(instance, v3d_device, vc4_device);
}
#endif
@@ -1082,238 +1022,6 @@ enumerate_devices(struct vk_instance *vk_instance)
return result;
}
-VKAPI_ATTR void VKAPI_CALL
-v3dv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceFeatures2 *pFeatures)
-{
- V3DV_FROM_HANDLE(v3dv_physical_device, physical_device, physicalDevice);
-
- struct vk_features features = {
- /* Vulkan 1.0 */
- .robustBufferAccess = true, /* This feature is mandatory */
- .fullDrawIndexUint32 = false, /* Only available since V3D 4.4.9.1 */
- .imageCubeArray = true,
- .independentBlend = true,
- .geometryShader = true,
- .tessellationShader = false,
- .sampleRateShading = true,
- .dualSrcBlend = false,
- .logicOp = true,
- .multiDrawIndirect = false,
- .drawIndirectFirstInstance = true,
- .depthClamp = false, /* Only available since V3D 4.5.1.1 */
- .depthBiasClamp = true,
- .fillModeNonSolid = true,
- .depthBounds = false, /* Only available since V3D 4.3.16.2 */
- .wideLines = true,
- .largePoints = true,
- .alphaToOne = true,
- .multiViewport = false,
- .samplerAnisotropy = true,
- .textureCompressionETC2 = true,
- .textureCompressionASTC_LDR = true,
- /* Note that textureCompressionBC requires that the driver support all
- * the BC formats. V3D 4.2 only support the BC1-3, so we can't claim
- * that we support it.
- */
- .textureCompressionBC = false,
- .occlusionQueryPrecise = true,
- .pipelineStatisticsQuery = false,
- .vertexPipelineStoresAndAtomics = true,
- .fragmentStoresAndAtomics = true,
- .shaderTessellationAndGeometryPointSize = true,
- .shaderImageGatherExtended = false,
- .shaderStorageImageExtendedFormats = true,
- .shaderStorageImageMultisample = false,
- .shaderStorageImageReadWithoutFormat = true,
- .shaderStorageImageWriteWithoutFormat = false,
- .shaderUniformBufferArrayDynamicIndexing = false,
- .shaderSampledImageArrayDynamicIndexing = false,
- .shaderStorageBufferArrayDynamicIndexing = false,
- .shaderStorageImageArrayDynamicIndexing = false,
- .shaderClipDistance = true,
- .shaderCullDistance = false,
- .shaderFloat64 = false,
- .shaderInt64 = false,
- .shaderInt16 = false,
- .shaderResourceResidency = false,
- .shaderResourceMinLod = false,
- .sparseBinding = false,
- .sparseResidencyBuffer = false,
- .sparseResidencyImage2D = false,
- .sparseResidencyImage3D = false,
- .sparseResidency2Samples = false,
- .sparseResidency4Samples = false,
- .sparseResidency8Samples = false,
- .sparseResidency16Samples = false,
- .sparseResidencyAliased = false,
- .variableMultisampleRate = false,
- .inheritedQueries = true,
-
- /* Vulkan 1.1 */
- .storageBuffer16BitAccess = true,
- .uniformAndStorageBuffer16BitAccess = true,
- .storagePushConstant16 = true,
- .storageInputOutput16 = false,
- .multiview = true,
- .multiviewGeometryShader = false,
- .multiviewTessellationShader = false,
- .variablePointersStorageBuffer = true,
- /* FIXME: this needs support for non-constant index on UBO/SSBO */
- .variablePointers = false,
- .protectedMemory = false,
-#ifdef ANDROID
- .samplerYcbcrConversion = false,
-#else
- .samplerYcbcrConversion = true,
-#endif
- .shaderDrawParameters = false,
-
- /* Vulkan 1.2 */
- .hostQueryReset = true,
- .uniformAndStorageBuffer8BitAccess = true,
- .uniformBufferStandardLayout = true,
- /* V3D 4.2 wraps TMU vector accesses to 16-byte boundaries, so loads and
- * stores of vectors that cross these boundaries would not work correcly
- * with scalarBlockLayout and would need to be split into smaller vectors
- * (and/or scalars) that don't cross these boundaries. For load/stores
- * with dynamic offsets where we can't identify if the offset is
- * problematic, we would always have to scalarize. Overall, this would
- * not lead to best performance so let's just not support it.
- */
- .scalarBlockLayout = false,
- /* This tells applications 2 things:
- *
- * 1. If they can select just one aspect for barriers. For us barriers
- * decide if we need to split a job and we don't care if it is only
- * for one of the aspects of the image or both, so we don't really
- * benefit from seeing barriers that select just one aspect.
- *
- * 2. If they can program different layouts for each aspect. We
- * generally don't care about layouts, so again, we don't get any
- * benefits from this to limit the scope of image layout transitions.
- *
- * Still, Vulkan 1.2 requires this feature to be supported so we
- * advertise it even though we don't really take advantage of it.
- */
- .separateDepthStencilLayouts = true,
- .storageBuffer8BitAccess = true,
- .storagePushConstant8 = true,
- .imagelessFramebuffer = true,
- .timelineSemaphore = true,
-
- .samplerMirrorClampToEdge = true,
-
- /* These are mandatory by Vulkan 1.2, however, we don't support any of
- * the optional features affected by them (non 32-bit types for
- * shaderSubgroupExtendedTypes and additional subgroup ballot for
- * subgroupBroadcastDynamicId), so in practice setting them to true
- * doesn't have any implications for us until we implement any of these
- * optional features.
- */
- .shaderSubgroupExtendedTypes = true,
- .subgroupBroadcastDynamicId = true,
-
- .vulkanMemoryModel = true,
- .vulkanMemoryModelDeviceScope = true,
- .vulkanMemoryModelAvailabilityVisibilityChains = true,
-
- .bufferDeviceAddress = true,
- .bufferDeviceAddressCaptureReplay = false,
- .bufferDeviceAddressMultiDevice = false,
-
- /* Vulkan 1.3 */
- .inlineUniformBlock = true,
- /* Inline buffers work like push constants, so after their are bound
- * some of their contents may be copied into the uniform stream as soon
- * as the next draw/dispatch is recorded in the command buffer. This means
- * that if the client updates the buffer contents after binding it to
- * a command buffer, the next queue submit of that command buffer may
- * not use the latest update to the buffer contents, but the data that
- * was present in the buffer at the time it was bound to the command
- * buffer.
- */
- .descriptorBindingInlineUniformBlockUpdateAfterBind = false,
- .pipelineCreationCacheControl = true,
- .privateData = true,
- .maintenance4 = true,
- .shaderZeroInitializeWorkgroupMemory = true,
- .synchronization2 = true,
- .robustImageAccess = true,
- .shaderIntegerDotProduct = true,
-
- /* VK_EXT_4444_formats */
- .formatA4R4G4B4 = true,
- .formatA4B4G4R4 = true,
-
- /* VK_EXT_custom_border_color */
- .customBorderColors = true,
- .customBorderColorWithoutFormat = false,
-
- /* VK_EXT_index_type_uint8 */
- .indexTypeUint8 = true,
-
- /* VK_EXT_line_rasterization */
- .rectangularLines = true,
- .bresenhamLines = true,
- .smoothLines = false,
- .stippledRectangularLines = false,
- .stippledBresenhamLines = false,
- .stippledSmoothLines = false,
-
- /* VK_EXT_color_write_enable */
- .colorWriteEnable = true,
-
- /* VK_KHR_pipeline_executable_properties */
- .pipelineExecutableInfo = true,
-
- /* VK_EXT_provoking_vertex */
- .provokingVertexLast = true,
- /* FIXME: update when supporting EXT_transform_feedback */
- .transformFeedbackPreservesProvokingVertex = false,
-
- /* VK_EXT_vertex_attribute_divisor */
- .vertexAttributeInstanceRateDivisor = true,
- .vertexAttributeInstanceRateZeroDivisor = false,
-
- /* VK_KHR_performance_query */
- .performanceCounterQueryPools = physical_device->caps.perfmon,
- .performanceCounterMultipleQueryPools = false,
-
- /* VK_EXT_texel_buffer_alignment */
- .texelBufferAlignment = true,
-
- /* VK_KHR_workgroup_memory_explicit_layout */
- .workgroupMemoryExplicitLayout = true,
- .workgroupMemoryExplicitLayoutScalarBlockLayout = false,
- .workgroupMemoryExplicitLayout8BitAccess = true,
- .workgroupMemoryExplicitLayout16BitAccess = true,
-
- /* VK_EXT_border_color_swizzle */
- .borderColorSwizzle = true,
- .borderColorSwizzleFromImage = true,
-
- /* VK_EXT_shader_module_identifier */
- .shaderModuleIdentifier = true,
-
- /* VK_EXT_depth_clip_control */
- .depthClipControl = true,
-
- /* VK_EXT_attachment_feedback_loop_layout */
- .attachmentFeedbackLoopLayout = true,
-
- /* VK_EXT_primitive_topology_list_restart */
- .primitiveTopologyListRestart = true,
- /* FIXME: we don't support tessellation shaders yet */
- .primitiveTopologyPatchListRestart = false,
-
- /* VK_EXT_pipeline_robustness */
- .pipelineRobustness = true,
- };
-
- vk_get_physical_device_features(pFeatures, &features);
-}
-
uint32_t
v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev)
{
@@ -1329,6 +1037,8 @@ v3dv_physical_device_device_id(struct v3dv_physical_device *dev)
switch (dev->devinfo.ver) {
case 42:
return 0xBE485FD3; /* Broadcom deviceID for 2711 */
+ case 71:
+ return 0x55701C33; /* Broadcom deviceID for 2712 */
default:
unreachable("Unsupported V3D version");
}
@@ -1357,6 +1067,8 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
const VkSampleCountFlags supported_sample_counts =
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
+ const uint8_t max_rts = V3D_MAX_RENDER_TARGETS(pdevice->devinfo.ver);
+
struct timespec clock_res;
clock_getres(CLOCK_MONOTONIC, &clock_res);
const float timestamp_period =
@@ -1427,7 +1139,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
.maxFragmentInputComponents = max_varying_components,
.maxFragmentOutputAttachments = 4,
.maxFragmentDualSrcAttachments = 0,
- .maxFragmentCombinedOutputResources = MAX_RENDER_TARGETS +
+ .maxFragmentCombinedOutputResources = max_rts +
MAX_STORAGE_BUFFERS +
MAX_STORAGE_IMAGES,
@@ -1440,7 +1152,8 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
.subPixelPrecisionBits = V3D_COORD_SHIFT,
.subTexelPrecisionBits = 8,
.mipmapPrecisionBits = 8,
- .maxDrawIndexedIndexValue = 0x00ffffff,
+ .maxDrawIndexedIndexValue = pdevice->devinfo.ver >= 71 ?
+ 0xffffffff : 0x00ffffff,
.maxDrawIndirectCount = 0x7fffffff,
.maxSamplerLodBias = 14.0f,
.maxSamplerAnisotropy = 16.0f,
@@ -1467,7 +1180,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
.framebufferDepthSampleCounts = supported_sample_counts,
.framebufferStencilSampleCounts = supported_sample_counts,
.framebufferNoAttachmentsSampleCounts = supported_sample_counts,
- .maxColorAttachments = MAX_RENDER_TARGETS,
+ .maxColorAttachments = max_rts,
.sampledImageColorSampleCounts = supported_sample_counts,
.sampledImageIntegerSampleCounts = supported_sample_counts,
.sampledImageDepthSampleCounts = supported_sample_counts,
@@ -1579,8 +1292,8 @@ v3dv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
.driverID = VK_DRIVER_ID_MESA_V3DV,
.conformanceVersion = {
.major = 1,
- .minor = 2,
- .subminor = 7,
+ .minor = 3,
+ .subminor = 6,
.patch = 1,
},
.supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
@@ -1689,6 +1402,24 @@ v3dv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
props->allowCommandBufferQueryCopies = true;
break;
}
+#ifdef ANDROID
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wswitch"
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID: {
+ VkPhysicalDevicePresentationPropertiesANDROID *props =
+ (VkPhysicalDevicePresentationPropertiesANDROID *)ext;
+ uint64_t front_rendering_usage = 0;
+ struct u_gralloc *gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
+ if (gralloc != NULL) {
+ u_gralloc_get_front_rendering_usage(gralloc, &front_rendering_usage);
+ u_gralloc_destroy(&gralloc);
+ }
+ props->sharedImage = front_rendering_usage ? VK_TRUE
+ : VK_FALSE;
+ break;
+ }
+#pragma GCC diagnostic pop
+#endif
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
VkPhysicalDeviceDrmPropertiesEXT *props =
(VkPhysicalDeviceDrmPropertiesEXT *)ext;
@@ -2001,6 +1732,11 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
return vk_error(NULL, result);
}
+#ifdef ANDROID
+ device->gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
+ assert(device->gralloc);
+#endif
+
device->instance = instance;
device->pdevice = physical_device;
@@ -2034,7 +1770,7 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
v3dv_pipeline_cache_init(&device->default_pipeline_cache, device, 0,
device->instance->default_pipeline_cache_enabled);
device->default_attribute_float =
- v3dv_pipeline_create_default_attribute_values(device, NULL);
+ v3dv_X(device, create_default_attribute_values)(device, NULL);
device->device_address_mem_ctx = ralloc_context(NULL);
util_dynarray_init(&device->device_address_bo_list,
@@ -2067,6 +1803,9 @@ fail:
v3dv_event_free_resources(device);
v3dv_query_free_resources(device);
vk_device_finish(&device->vk);
+#ifdef ANDROID
+ u_gralloc_destroy(&device->gralloc);
+#endif
vk_free(&device->vk.alloc, device);
return result;
@@ -2105,6 +1844,9 @@ v3dv_DestroyDevice(VkDevice _device,
mtx_destroy(&device->query_mutex);
vk_device_finish(&device->vk);
+#ifdef ANDROID
+ u_gralloc_destroy(&device->gralloc);
+#endif
vk_free2(&device->vk.alloc, pAllocator, device);
}
@@ -2244,18 +1986,8 @@ device_alloc_for_wsi(struct v3dv_device *device,
#if using_v3d_simulator
return device_alloc(device, mem, size);
#else
- /* If we are allocating for WSI we should have a swapchain and thus,
- * we should've initialized the display device. However, Zink doesn't
- * use swapchains, so in that case we can get here without acquiring the
- * display device and we need to do it now.
- */
VkResult result;
struct v3dv_physical_device *pdevice = device->pdevice;
- if (unlikely(pdevice->display_fd < 0)) {
- result = v3dv_physical_device_acquire_display(pdevice, NULL);
- if (result != VK_SUCCESS)
- return result;
- }
assert(pdevice->display_fd != -1);
mem->is_for_wsi = true;
@@ -2329,7 +2061,7 @@ free_memory(struct v3dv_device *device,
device_free(device, mem);
- vk_object_free(&device->vk, pAllocator, mem);
+ vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
}
VKAPI_ATTR void VKAPI_CALL
@@ -2354,13 +2086,10 @@ v3dv_AllocateMemory(VkDevice _device,
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
- /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
- assert(pAllocateInfo->allocationSize > 0);
-
/* We always allocate device memory in multiples of a page, so round up
* requested size to that.
*/
- const VkDeviceSize alloc_size = ALIGN(pAllocateInfo->allocationSize, 4096);
+ const VkDeviceSize alloc_size = align64(pAllocateInfo->allocationSize, 4096);
if (unlikely(alloc_size > MAX_MEMORY_ALLOCATION_SIZE))
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
@@ -2369,8 +2098,8 @@ v3dv_AllocateMemory(VkDevice _device,
if (unlikely(heap_used + alloc_size > pdevice->memory.memoryHeaps[0].size))
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- mem = vk_object_zalloc(&device->vk, pAllocator, sizeof(*mem),
- VK_OBJECT_TYPE_DEVICE_MEMORY);
+ mem = vk_device_memory_create(&device->vk, pAllocateInfo,
+ pAllocator, sizeof(*mem));
if (mem == NULL)
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
@@ -2410,6 +2139,7 @@ v3dv_AllocateMemory(VkDevice _device,
}
VkResult result;
+
if (wsi_info) {
result = device_alloc_for_wsi(device, pAllocator, mem, alloc_size);
} else if (fd_info && fd_info->handleType) {
@@ -2419,12 +2149,22 @@ v3dv_AllocateMemory(VkDevice _device,
fd_info->fd, alloc_size, &mem->bo);
if (result == VK_SUCCESS)
close(fd_info->fd);
+ } else if (mem->vk.ahardware_buffer) {
+#ifdef ANDROID
+ const native_handle_t *handle = AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer);
+ assert(handle->numFds > 0);
+ size_t size = lseek(handle->data[0], 0, SEEK_END);
+ result = device_import_bo(device, pAllocator,
+ handle->data[0], size, &mem->bo);
+#else
+ result = VK_ERROR_FEATURE_NOT_PRESENT;
+#endif
} else {
result = device_alloc(device, mem, alloc_size);
}
if (result != VK_SUCCESS) {
- vk_object_free(&device->vk, pAllocator, mem);
+ vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
return vk_error(device, result);
}
@@ -2436,7 +2176,7 @@ v3dv_AllocateMemory(VkDevice _device,
/* If this memory can be used via VK_KHR_buffer_device_address then we
* will need to manually add the BO to any job submit that makes use of
- * VK_KHR_buffer_device_address, since such jobs may produde buffer
+ * VK_KHR_buffer_device_address, since such jobs may produce buffer
* load/store operations that may access any buffer memory allocated with
* this flag and we don't have any means to tell which buffers will be
* accessed through this mechanism since they don't even have to be bound
@@ -2668,11 +2408,44 @@ v3dv_BindImageMemory2(VkDevice _device,
const VkBindImageMemoryInfo *pBindInfos)
{
for (uint32_t i = 0; i < bindInfoCount; i++) {
-#ifndef ANDROID
+#ifdef ANDROID
+ V3DV_FROM_HANDLE(v3dv_device_memory, mem, pBindInfos[i].memory);
+ V3DV_FROM_HANDLE(v3dv_device, device, _device);
+ if (mem != NULL && mem->vk.ahardware_buffer) {
+ AHardwareBuffer_Desc description;
+ const native_handle_t *handle = AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer);
+
+ V3DV_FROM_HANDLE(v3dv_image, image, pBindInfos[i].image);
+ AHardwareBuffer_describe(mem->vk.ahardware_buffer, &description);
+
+ struct u_gralloc_buffer_handle gr_handle = {
+ .handle = handle,
+ .pixel_stride = description.stride,
+ .hal_format = description.format,
+ };
+
+ VkResult result = v3dv_gralloc_to_drm_explicit_layout(
+ device->gralloc,
+ &gr_handle,
+ image->android_explicit_layout,
+ image->android_plane_layouts,
+ V3DV_MAX_PLANE_COUNT);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = v3dv_update_image_layout(
+ device, image, image->android_explicit_layout->drmFormatModifier,
+ /* disjoint = */ false, image->android_explicit_layout);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+#endif
+
const VkBindImageMemorySwapchainInfoKHR *swapchain_info =
vk_find_struct_const(pBindInfos->pNext,
BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR);
if (swapchain_info && swapchain_info->swapchain) {
+#ifndef ANDROID
struct v3dv_image *swapchain_image =
v3dv_wsi_get_image_from_swapchain(swapchain_info->swapchain,
swapchain_info->imageIndex);
@@ -2685,8 +2458,8 @@ v3dv_BindImageMemory2(VkDevice _device,
.memoryOffset = swapchain_image->planes[0].mem_offset,
};
bind_image_memory(&swapchain_bind);
- } else
#endif
+ } else
{
bind_image_memory(&pBindInfos[i]);
}
@@ -2716,6 +2489,18 @@ get_buffer_memory_requirements(struct v3dv_buffer *buffer,
.size = align64(buffer->size, buffer->alignment),
};
+ /* UBO and SSBO may be read using ldunifa, which prefetches the next
+ * 4 bytes after a read. If the buffer's size is exactly a multiple
+ * of a page size and the shader reads the last 4 bytes with ldunifa
+ * the prefetching would read out of bounds and cause an MMU error,
+ * so we allocate extra space to avoid kernel error spamming.
+ */
+ bool can_ldunifa = buffer->usage &
+ (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
+ if (can_ldunifa && (buffer->size % 4096 == 0))
+ pMemoryRequirements->memoryRequirements.size += buffer->alignment;
+
vk_foreach_struct(ext, pMemoryRequirements->pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
@@ -2978,7 +2763,7 @@ v3dv_CreateSampler(VkDevice _device,
}
}
- v3dv_X(device, pack_sampler_state)(sampler, pCreateInfo, bc_info);
+ v3dv_X(device, pack_sampler_state)(device, sampler, pCreateInfo, bc_info);
*pSampler = v3dv_sampler_to_handle(sampler);
@@ -3079,9 +2864,9 @@ vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
*
* - Loader interface v4 differs from v3 in:
* - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
- *
+ *
* - Loader interface v5 differs from v4 in:
- * - The ICD must support Vulkan API version 1.1 and must not return
+ * - The ICD must support Vulkan API version 1.1 and must not return
* VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a
* Vulkan Loader with interface v4 or smaller is being used and the
* application provides an API version that is greater than 1.0.
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_event.c b/lib/mesa/src/broadcom/vulkan/v3dv_event.c
index 966392400..a3aad37d9 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_event.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_event.c
@@ -33,20 +33,16 @@ get_set_event_cs()
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
"set event cs");
- b.shader->info.workgroup_size[0] = 1;
- b.shader->info.workgroup_size[1] = 1;
- b.shader->info.workgroup_size[2] = 1;
-
- nir_ssa_def *buf =
+ nir_def *buf =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 0,
.binding = 0,
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
- nir_ssa_def *offset =
+ nir_def *offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
- nir_ssa_def *value =
+ nir_def *value =
nir_load_push_constant(&b, 1, 8, nir_imm_int(&b, 0), .base = 4, .range = 4);
nir_store_ssbo(&b, value, buf, offset,
@@ -62,23 +58,19 @@ get_wait_event_cs()
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
"wait event cs");
- b.shader->info.workgroup_size[0] = 1;
- b.shader->info.workgroup_size[1] = 1;
- b.shader->info.workgroup_size[2] = 1;
-
- nir_ssa_def *buf =
+ nir_def *buf =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 0,
.binding = 0,
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
- nir_ssa_def *offset =
+ nir_def *offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
nir_loop *loop = nir_push_loop(&b);
- nir_ssa_def *load =
+ nir_def *load =
nir_load_ssbo(&b, 1, 8, buf, offset, .access = 0, .align_mul = 4);
- nir_ssa_def *value = nir_i2i32(&b, load);
+ nir_def *value = nir_i2i32(&b, load);
nir_if *if_stmt = nir_push_if(&b, nir_ieq_imm(&b, value, 1));
nir_jump(&b, nir_jump_break);
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_formats.c b/lib/mesa/src/broadcom/vulkan/v3dv_formats.c
index ecb369963..01be6dcf4 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_formats.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_formats.c
@@ -22,13 +22,18 @@
*/
#include "v3dv_private.h"
-#include "vk_util.h"
+#ifdef ANDROID
+#include "vk_android.h"
+#endif
#include "vk_enum_defines.h"
+#include "vk_util.h"
#include "drm-uapi/drm_fourcc.h"
#include "util/format/u_format.h"
#include "vulkan/wsi/wsi_common.h"
+#include <vulkan/vulkan_android.h>
+
const uint8_t *
v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f, uint8_t plane)
{
@@ -169,6 +174,7 @@ image_format_plane_features(struct v3dv_physical_device *pdevice,
if (desc->nr_channels == 1 && vk_format_is_int(vk_format))
flags |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_ATOMIC_BIT;
} else if (vk_format == VK_FORMAT_A2B10G10R10_UNORM_PACK32 ||
+ vk_format == VK_FORMAT_A2R10G10B10_UNORM_PACK32 ||
vk_format == VK_FORMAT_A2B10G10R10_UINT_PACK32 ||
vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
/* To comply with shaderStorageImageExtendedFormats */
@@ -291,7 +297,8 @@ buffer_format_features(VkFormat vk_format, const struct v3dv_format *v3dv_format
VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT |
VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT_KHR;
}
- } else if (vk_format == VK_FORMAT_A2B10G10R10_UNORM_PACK32) {
+ } else if (vk_format == VK_FORMAT_A2B10G10R10_UNORM_PACK32 ||
+ vk_format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) {
flags |= VK_FORMAT_FEATURE_2_VERTEX_BUFFER_BIT |
VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT |
VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT;
@@ -658,6 +665,7 @@ v3dv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *drm_format_mod_info = NULL;
VkExternalImageFormatProperties *external_props = NULL;
+ UNUSED VkAndroidHardwareBufferUsageANDROID *android_usage = NULL;
VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL;
VkImageTiling tiling = base_info->tiling;
@@ -698,6 +706,9 @@ v3dv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,
case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
external_props = (void *) s;
break;
+ case VK_STRUCTURE_TYPE_ANDROID_HARDWARE_BUFFER_USAGE_ANDROID:
+ android_usage = (void *)s;
+ break;
case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES:
ycbcr_props = (void *) s;
break;
@@ -721,12 +732,28 @@ v3dv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,
if (external_props)
external_props->externalMemoryProperties = prime_fd_props;
break;
+#ifdef ANDROID
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID:
+ if (external_props) {
+ external_props->externalMemoryProperties.exportFromImportedHandleTypes = 0;
+ external_props->externalMemoryProperties.compatibleHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID;
+ external_props->externalMemoryProperties.externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT | VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+ }
+ break;
+#endif
default:
result = VK_ERROR_FORMAT_NOT_SUPPORTED;
break;
}
}
+ if (android_usage) {
+#ifdef ANDROID
+ android_usage->androidHardwareBufferUsage =
+ vk_image_usage_to_ahb_usage(base_info->flags, base_info->usage);
+#endif
+ }
+
done:
return result;
}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_image.c b/lib/mesa/src/broadcom/vulkan/v3dv_image.c
index 325cc7ce3..c02516960 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_image.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_image.c
@@ -28,6 +28,9 @@
#include "util/u_math.h"
#include "vk_util.h"
#include "vulkan/wsi/wsi_common.h"
+#ifdef ANDROID
+#include "vk_android.h"
+#endif
/**
* Computes the HW's UIFblock padding for a given height/cpp.
@@ -70,27 +73,38 @@ v3d_get_ub_pad(uint32_t cpp, uint32_t height)
return 0;
}
-static void
+/**
+ * Computes the dimension with required padding for mip levels.
+ *
+ * This padding is required for width and height dimensions when the mip
+ * level is greater than 1, and for the depth dimension when the mip level
+ * is greater than 0. This function expects to be passed a mip level >= 1.
+ *
+ * Note: Hardware documentation seems to suggest that the third argument
+ * should be the utile dimensions, but through testing it was found that
+ * the block dimension should be used instead.
+ */
+static uint32_t
+v3d_get_dimension_mpad(uint32_t dimension, uint32_t level, uint32_t block_dimension)
+{
+ assert(level >= 1);
+ uint32_t pot_dim = u_minify(dimension, 1);
+ pot_dim = util_next_power_of_two(DIV_ROUND_UP(pot_dim, block_dimension));
+ uint32_t padded_dim = block_dimension * pot_dim;
+ return u_minify(padded_dim, level - 1);
+}
+
+static bool
v3d_setup_plane_slices(struct v3dv_image *image, uint8_t plane,
- uint32_t plane_offset)
+ uint32_t plane_offset,
+ const VkSubresourceLayout *plane_layouts)
{
assert(image->planes[plane].cpp > 0);
- /* Texture Base Adress needs to be 64-byte aligned */
- assert(plane_offset % 64 == 0);
uint32_t width = image->planes[plane].width;
uint32_t height = image->planes[plane].height;
uint32_t depth = image->vk.extent.depth;
- /* Note that power-of-two padding is based on level 1. These are not
- * equivalent to just util_next_power_of_two(dimension), because at a
- * level 0 dimension of 9, the level 1 power-of-two padded value is 4,
- * not 8.
- */
- uint32_t pot_width = 2 * util_next_power_of_two(u_minify(width, 1));
- uint32_t pot_height = 2 * util_next_power_of_two(u_minify(height, 1));
- uint32_t pot_depth = 2 * util_next_power_of_two(u_minify(depth, 1));
-
uint32_t utile_w = v3d_utile_width(image->planes[plane].cpp);
uint32_t utile_h = v3d_utile_height(image->planes[plane].cpp);
uint32_t uif_block_w = utile_w * 2;
@@ -99,6 +113,21 @@ v3d_setup_plane_slices(struct v3dv_image *image, uint8_t plane,
uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
uint32_t block_height = vk_format_get_blockheight(image->vk.format);
+ /* Note that power-of-two padding is based on level 1. These are not
+ * equivalent to just util_next_power_of_two(dimension), because at a
+ * level 0 dimension of 9, the level 1 power-of-two padded value is 4,
+ * not 8. Additionally the pot padding is based on the block size.
+ */
+ uint32_t pot_width = 2 * v3d_get_dimension_mpad(width,
+ 1,
+ block_width);
+ uint32_t pot_height = 2 * v3d_get_dimension_mpad(height,
+ 1,
+ block_height);
+ uint32_t pot_depth = 2 * v3d_get_dimension_mpad(depth,
+ 1,
+ 1);
+
assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT ||
image->vk.samples == VK_SAMPLE_COUNT_4_BIT);
bool msaa = image->vk.samples != VK_SAMPLE_COUNT_1_BIT;
@@ -109,14 +138,30 @@ v3d_setup_plane_slices(struct v3dv_image *image, uint8_t plane,
assert(depth > 0);
assert(image->vk.mip_levels >= 1);
- uint32_t offset = plane_offset;
+ /* Texture Base Address needs to be 64-byte aligned. If we have an explicit
+ * plane layout we will return false to fail image creation with appropriate
+ * error code.
+ */
+ uint32_t offset;
+ if (plane_layouts) {
+ offset = plane_layouts[plane].offset;
+ if (offset % 64 != 0)
+ return false;
+ } else {
+ offset = plane_offset;
+ }
+ assert(plane_offset % 64 == 0);
+
for (int32_t i = image->vk.mip_levels - 1; i >= 0; i--) {
struct v3d_resource_slice *slice = &image->planes[plane].slices[i];
+ slice->width = u_minify(width, i);
+ slice->height = u_minify(height, i);
+
uint32_t level_width, level_height, level_depth;
if (i < 2) {
- level_width = u_minify(width, i);
- level_height = u_minify(height, i);
+ level_width = slice->width;
+ level_height = slice->height;
} else {
level_width = u_minify(pot_width, i);
level_height = u_minify(pot_height, i);
@@ -179,6 +224,18 @@ v3d_setup_plane_slices(struct v3dv_image *image, uint8_t plane,
slice->offset = offset;
slice->stride = level_width * image->planes[plane].cpp;
+
+ /* We assume that rowPitch in the plane layout refers to level 0 */
+ if (plane_layouts && i == 0) {
+ if (plane_layouts[plane].rowPitch < slice->stride)
+ return false;
+ if (plane_layouts[plane].rowPitch % image->planes[plane].cpp)
+ return false;
+ if (image->tiled && (plane_layouts[plane].rowPitch % (4 * uif_block_w)))
+ return false;
+ slice->stride = plane_layouts[plane].rowPitch;
+ }
+
slice->padded_height = level_height;
if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
slice->tiling == V3D_TILING_UIF_XOR) {
@@ -222,7 +279,8 @@ v3d_setup_plane_slices(struct v3dv_image *image, uint8_t plane,
image->planes[plane].alignment = 4096;
} else {
image->planes[plane].alignment =
- (image->vk.usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) ? 64 : image->planes[plane].cpp;
+ (image->vk.usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) ?
+ 64 : image->planes[plane].cpp;
}
uint32_t align_offset =
@@ -243,15 +301,36 @@ v3d_setup_plane_slices(struct v3dv_image *image, uint8_t plane,
image->planes[plane].cube_map_stride =
align(image->planes[plane].slices[0].offset +
image->planes[plane].slices[0].size, 64);
+
+ if (plane_layouts && image->vk.array_layers > 1) {
+ if (plane_layouts[plane].arrayPitch % 64 != 0)
+ return false;
+ if (plane_layouts[plane].arrayPitch <
+ image->planes[plane].cube_map_stride) {
+ return false;
+ }
+ image->planes[plane].cube_map_stride = plane_layouts[plane].arrayPitch;
+ }
+
image->planes[plane].size += image->planes[plane].cube_map_stride *
(image->vk.array_layers - 1);
} else {
image->planes[plane].cube_map_stride = image->planes[plane].slices[0].size;
+ if (plane_layouts) {
+ /* We assume that depthPitch in the plane layout refers to level 0 */
+ if (plane_layouts[plane].depthPitch !=
+ image->planes[plane].slices[0].size) {
+ return false;
+ }
+ }
}
+
+ return true;
}
-static void
-v3d_setup_slices(struct v3dv_image *image, bool disjoint)
+static bool
+v3d_setup_slices(struct v3dv_image *image, bool disjoint,
+ const VkSubresourceLayout *plane_layouts)
{
if (disjoint && image->plane_count == 1)
disjoint = false;
@@ -259,11 +338,15 @@ v3d_setup_slices(struct v3dv_image *image, bool disjoint)
uint32_t offset = 0;
for (uint8_t plane = 0; plane < image->plane_count; plane++) {
offset = disjoint ? 0 : offset;
- v3d_setup_plane_slices(image, plane, offset);
+ if (!v3d_setup_plane_slices(image, plane, offset, plane_layouts)) {
+ assert(plane_layouts);
+ return false;
+ }
offset += align(image->planes[plane].size, 64);
}
image->non_disjoint_size = disjoint ? 0 : offset;
+ return true;
}
uint32_t
@@ -280,6 +363,34 @@ v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer
}
VkResult
+v3dv_update_image_layout(struct v3dv_device *device,
+ struct v3dv_image *image,
+ uint64_t modifier,
+ bool disjoint,
+ const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod_info)
+{
+ assert(!explicit_mod_info ||
+ image->plane_count == explicit_mod_info->drmFormatModifierPlaneCount);
+
+ assert(!explicit_mod_info ||
+ modifier == explicit_mod_info->drmFormatModifier);
+
+ image->tiled = modifier != DRM_FORMAT_MOD_LINEAR;
+
+ image->vk.drm_format_mod = modifier;
+
+ bool ok =
+ v3d_setup_slices(image, disjoint,
+ explicit_mod_info ? explicit_mod_info->pPlaneLayouts : NULL);
+ if (!ok) {
+ assert(explicit_mod_info);
+ return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult
v3dv_image_init(struct v3dv_device *device,
const VkImageCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -296,11 +407,20 @@ v3dv_image_init(struct v3dv_device *device,
*/
VkImageTiling tiling = pCreateInfo->tiling;
uint64_t modifier = DRM_FORMAT_MOD_INVALID;
+ const VkImageDrmFormatModifierListCreateInfoEXT *mod_info = NULL;
+ const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod_info = NULL;
+#ifdef ANDROID
+ if (image->is_native_buffer_memory) {
+ assert(image->android_explicit_layout);
+ explicit_mod_info = image->android_explicit_layout;
+ modifier = explicit_mod_info->drmFormatModifier;
+ }
+#endif
if (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
- const VkImageDrmFormatModifierListCreateInfoEXT *mod_info =
+ mod_info =
vk_find_struct_const(pCreateInfo->pNext,
IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
- const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod_info =
+ explicit_mod_info =
vk_find_struct_const(pCreateInfo->pNext,
IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
assert(mod_info || explicit_mod_info);
@@ -327,35 +447,20 @@ v3dv_image_init(struct v3dv_device *device,
tiling = VK_IMAGE_TILING_LINEAR;
}
-#ifdef ANDROID
- const VkNativeBufferANDROID *native_buffer =
- vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
-
- int native_buf_fd = -1;
- int native_buf_stride = 0;
- int native_buf_size = 0;
-
- if (native_buffer != NULL) {
- VkResult result = v3dv_gralloc_info(device, native_buffer, &native_buf_fd,
- &native_buf_stride, &native_buf_size,
- &modifier);
- if (result != VK_SUCCESS)
- return result;
-
- if (modifier != DRM_FORMAT_MOD_BROADCOM_UIF)
- tiling = VK_IMAGE_TILING_LINEAR;
- }
-#endif
+ if (modifier == DRM_FORMAT_MOD_INVALID)
+ modifier = (tiling == VK_IMAGE_TILING_OPTIMAL) ? DRM_FORMAT_MOD_BROADCOM_UIF
+ : DRM_FORMAT_MOD_LINEAR;
const struct v3dv_format *format =
- v3dv_X(device, get_format)(pCreateInfo->format);
+ v3dv_X(device, get_format)(image->vk.format);
v3dv_assert(format != NULL && format->plane_count);
assert(pCreateInfo->samples == VK_SAMPLE_COUNT_1_BIT ||
pCreateInfo->samples == VK_SAMPLE_COUNT_4_BIT);
image->format = format;
- image->plane_count = vk_format_get_plane_count(pCreateInfo->format);
+
+ image->plane_count = vk_format_get_plane_count(image->vk.format);
const struct vk_format_ycbcr_info *ycbcr_info =
vk_format_get_ycbcr_info(image->vk.format);
@@ -378,12 +483,6 @@ v3dv_image_init(struct v3dv_device *device,
ycbcr_info->planes[plane].denominator_scales[1];
}
}
- image->tiled = tiling == VK_IMAGE_TILING_OPTIMAL ||
- (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT &&
- modifier != DRM_FORMAT_MOD_LINEAR);
-
- image->vk.tiling = tiling;
- image->vk.drm_format_mod = modifier;
/* Our meta paths can create image views with compatible formats for any
* image, so always set this flag to keep the common Vulkan image code
@@ -391,26 +490,18 @@ v3dv_image_init(struct v3dv_device *device,
*/
image->vk.create_flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
- bool disjoint = image->vk.create_flags & VK_IMAGE_CREATE_DISJOINT_BIT;
- v3d_setup_slices(image, disjoint);
-
#ifdef ANDROID
- if (native_buffer != NULL) {
- assert(image->plane_count == 1);
- image->planes[0].slices[0].stride = native_buf_stride;
- image->non_disjoint_size =
- image->planes[0].slices[0].size =
- image->planes[0].size = native_buf_size;
-
- VkResult result = v3dv_import_native_buffer_fd(v3dv_device_to_handle(device),
- native_buf_fd, pAllocator,
- v3dv_image_to_handle(image));
- if (result != VK_SUCCESS)
- return result;
- }
+ /* At this time, an AHB handle is not yet provided.
+ * Image layout will be filled up during vkBindImageMemory2
+ */
+ if (image->is_ahb)
+ return VK_SUCCESS;
#endif
- return VK_SUCCESS;
+ bool disjoint = image->vk.create_flags & VK_IMAGE_CREATE_DISJOINT_BIT;
+
+ return v3dv_update_image_layout(device, image, modifier, disjoint,
+ explicit_mod_info);
}
static VkResult
@@ -419,21 +510,92 @@ create_image(struct v3dv_device *device,
const VkAllocationCallbacks *pAllocator,
VkImage *pImage)
{
+ VkResult result;
struct v3dv_image *image = NULL;
image = vk_image_create(&device->vk, pCreateInfo, pAllocator, sizeof(*image));
if (image == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
- VkResult result = v3dv_image_init(device, pCreateInfo, pAllocator, image);
- if (result != VK_SUCCESS) {
- vk_image_destroy(&device->vk, pAllocator, &image->vk);
- return result;
+#ifdef ANDROID
+ const VkExternalMemoryImageCreateInfo *external_info =
+ vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
+
+ const VkNativeBufferANDROID *native_buffer =
+ vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
+
+ if (native_buffer != NULL)
+ image->is_native_buffer_memory = true;
+
+ image->is_ahb = external_info && (external_info->handleTypes &
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
+
+ assert(!(image->is_ahb && image->is_native_buffer_memory));
+
+ if (image->is_ahb || image->is_native_buffer_memory) {
+ image->android_explicit_layout = vk_alloc2(&device->vk.alloc, pAllocator,
+ sizeof(VkImageDrmFormatModifierExplicitCreateInfoEXT),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!image->android_explicit_layout) {
+ result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto fail;
+ }
+
+ image->android_plane_layouts = vk_alloc2(&device->vk.alloc, pAllocator,
+ sizeof(VkSubresourceLayout) * V3DV_MAX_PLANE_COUNT,
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!image->android_plane_layouts) {
+ result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto fail;
+ }
+ }
+
+ if (image->is_native_buffer_memory) {
+ struct u_gralloc_buffer_handle gr_handle = {
+ .handle = native_buffer->handle,
+ .hal_format = native_buffer->format,
+ .pixel_stride = native_buffer->stride,
+ };
+
+ result = v3dv_gralloc_to_drm_explicit_layout(device->gralloc,
+ &gr_handle,
+ image->android_explicit_layout,
+ image->android_plane_layouts,
+ V3DV_MAX_PLANE_COUNT);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+#endif
+
+ result = v3dv_image_init(device, pCreateInfo, pAllocator, image);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+#ifdef ANDROID
+ if (image->is_native_buffer_memory) {
+ result = v3dv_import_native_buffer_fd(v3dv_device_to_handle(device),
+ native_buffer->handle->data[0], pAllocator,
+ v3dv_image_to_handle(image));
+ if (result != VK_SUCCESS)
+ goto fail;
}
+#endif
*pImage = v3dv_image_to_handle(image);
return VK_SUCCESS;
+
+fail:
+#ifdef ANDROID
+ if (image->android_explicit_layout)
+ vk_free2(&device->vk.alloc, pAllocator, image->android_explicit_layout);
+ if (image->android_plane_layouts)
+ vk_free2(&device->vk.alloc, pAllocator, image->android_plane_layouts);
+#endif
+
+ vk_image_destroy(&device->vk, pAllocator, &image->vk);
+ return result;
}
static VkResult
@@ -534,8 +696,10 @@ v3dv_GetImageSubresourceLayout(VkDevice device,
v3dv_layer_offset(image, subresource->mipLevel, subresource->arrayLayer,
plane) - image->planes[plane].mem_offset;
layout->rowPitch = slice->stride;
- layout->depthPitch = image->planes[plane].cube_map_stride;
- layout->arrayPitch = image->planes[plane].cube_map_stride;
+ layout->depthPitch = image->vk.image_type == VK_IMAGE_TYPE_3D ?
+ image->planes[plane].cube_map_stride : 0;
+ layout->arrayPitch = image->vk.array_layers > 1 ?
+ image->planes[plane].cube_map_stride : 0;
if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
layout->size = slice->size;
@@ -567,12 +731,33 @@ v3dv_DestroyImage(VkDevice _device,
if (image == NULL)
return;
+ /* If we have created a shadow tiled image for this image we must also free
+ * it (along with its memory allocation).
+ */
+ if (image->shadow) {
+ bool disjoint = image->vk.create_flags & VK_IMAGE_CREATE_DISJOINT_BIT;
+ for (int i = 0; i < (disjoint ? image->plane_count : 1); i++) {
+ if (image->shadow->planes[i].mem) {
+ v3dv_FreeMemory(_device,
+ v3dv_device_memory_to_handle(image->shadow->planes[i].mem),
+ pAllocator);
+ }
+ }
+ v3dv_DestroyImage(_device, v3dv_image_to_handle(image->shadow),
+ pAllocator);
+ image->shadow = NULL;
+ }
+
#ifdef ANDROID
- assert(image->plane_count == 1);
if (image->is_native_buffer_memory)
v3dv_FreeMemory(_device,
v3dv_device_memory_to_handle(image->planes[0].mem),
pAllocator);
+
+ if (image->android_explicit_layout)
+ vk_free2(&device->vk.alloc, pAllocator, image->android_explicit_layout);
+ if (image->android_plane_layouts)
+ vk_free2(&device->vk.alloc, pAllocator, image->android_plane_layouts);
#endif
vk_image_destroy(&device->vk, pAllocator, &image->vk);
@@ -641,8 +826,7 @@ create_image_view(struct v3dv_device *device,
* makes sense to implement swizzle composition using VkSwizzle directly.
*/
VkFormat format;
- uint8_t image_view_swizzle[4];
- if (pCreateInfo->format == VK_FORMAT_D24_UNORM_S8_UINT &&
+ if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
range->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
format = VK_FORMAT_R8G8B8A8_UINT;
uint8_t stencil_aspect_swizzle[4] = {
@@ -652,11 +836,11 @@ create_image_view(struct v3dv_device *device,
vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle, view_swizzle);
util_format_compose_swizzles(stencil_aspect_swizzle, view_swizzle,
- image_view_swizzle);
+ iview->view_swizzle);
} else {
- format = pCreateInfo->format;
+ format = iview->vk.format;
vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle,
- image_view_swizzle);
+ iview->view_swizzle);
}
iview->vk.view_format = format;
@@ -681,7 +865,7 @@ create_image_view(struct v3dv_device *device,
const uint8_t *format_swizzle =
v3dv_get_format_swizzle(device, format, plane);
- util_format_compose_swizzles(format_swizzle, image_view_swizzle,
+ util_format_compose_swizzles(format_swizzle, iview->view_swizzle,
iview->planes[plane].swizzle);
iview->planes[plane].swap_rb = v3dv_format_swizzle_needs_rb_swap(format_swizzle);
@@ -725,6 +909,13 @@ v3dv_DestroyImageView(VkDevice _device,
if (image_view == NULL)
return;
+ if (image_view->shadow) {
+ v3dv_DestroyImageView(_device,
+ v3dv_image_view_to_handle(image_view->shadow),
+ pAllocator);
+ image_view->shadow = NULL;
+ }
+
vk_image_view_destroy(&device->vk, pAllocator, &image_view->vk);
}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_limits.h b/lib/mesa/src/broadcom/vulkan/v3dv_limits.h
index 9cda9f0d6..4df172e6b 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_limits.h
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_limits.h
@@ -41,7 +41,7 @@
#define MAX_STORAGE_IMAGES 4
#define MAX_INPUT_ATTACHMENTS 4
-#define MAX_UNIFORM_BUFFERS 12
+#define MAX_UNIFORM_BUFFERS 16
#define MAX_INLINE_UNIFORM_BUFFERS 4
#define MAX_STORAGE_BUFFERS 8
@@ -50,8 +50,6 @@
#define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + \
MAX_DYNAMIC_STORAGE_BUFFERS)
-#define MAX_RENDER_TARGETS 4
-
#define MAX_MULTIVIEW_VIEW_COUNT 16
/* These are tunable parameters in the HW design, but all the V3D
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c b/lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c
index 9d7e36928..8eeb03e57 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c
@@ -73,7 +73,7 @@ clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
* conversion"
*/
assert(image->plane_count == 1);
- if (!v3dv_meta_can_use_tlb(image, 0, &origin, &fb_format))
+ if (!v3dv_meta_can_use_tlb(image, 0, 0, &origin, NULL, &fb_format))
return false;
uint32_t internal_type, internal_bpp;
@@ -127,6 +127,7 @@ clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
v3dv_job_start_frame(job, width, height, max_layer,
false, true, 1, internal_bpp,
+ 4 * v3d_internal_bpp_words(internal_bpp),
image->vk.samples > VK_SAMPLE_COUNT_1_BIT);
struct v3dv_meta_framebuffer framebuffer;
@@ -329,7 +330,7 @@ get_clear_rect_vs()
nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
- nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
+ nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
nir_store_var(&b, vs_out_pos, pos, 0xf);
return b.shader;
@@ -352,8 +353,8 @@ get_clear_rect_gs(uint32_t push_constant_layer_base)
nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
(1ull << VARYING_SLOT_LAYER);
- nir->info.gs.input_primitive = SHADER_PRIM_TRIANGLES;
- nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
+ nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
+ nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
nir->info.gs.vertices_in = 3;
nir->info.gs.vertices_out = 3;
nir->info.gs.invocations = 1;
@@ -386,7 +387,7 @@ get_clear_rect_gs(uint32_t push_constant_layer_base)
nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
/* gl_Layer from push constants */
- nir_ssa_def *layer =
+ nir_def *layer =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
.base = push_constant_layer_base, .range = 4);
nir_store_var(&b, gs_out_layer, layer, 0x1);
@@ -414,7 +415,7 @@ get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)
nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
- nir_ssa_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
+ nir_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
nir_store_var(&b, fs_out_color, color_load, 0xf);
return b.shader;
@@ -432,7 +433,7 @@ get_depth_clear_rect_fs()
"out_depth");
fs_out_depth->data.location = FRAG_RESULT_DEPTH;
- nir_ssa_def *depth_load =
+ nir_def *depth_load =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
nir_store_var(&b, fs_out_depth, depth_load, 0x1);
@@ -747,7 +748,7 @@ get_color_clear_pipeline_cache_key(uint32_t rt_idx,
uint32_t bit_offset = 0;
key |= rt_idx;
- bit_offset += 2;
+ bit_offset += 3;
key |= ((uint64_t) format) << bit_offset;
bit_offset += 32;
@@ -1189,9 +1190,11 @@ v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- /* We can only clear attachments in the current subpass */
- assert(attachmentCount <= 5); /* 4 color + D/S */
+ /* We can have at most max_color_RTs + 1 D/S attachments */
+ assert(attachmentCount <=
+ V3D_MAX_RENDER_TARGETS(cmd_buffer->device->devinfo.ver) + 1);
+ /* We can only clear attachments in the current subpass */
struct v3dv_render_pass *pass = cmd_buffer->state.pass;
assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c b/lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c
index 4d83e5379..f9779bf26 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c
@@ -351,18 +351,37 @@ get_compatible_tlb_format(VkFormat format)
* Checks if we can implement an image copy or clear operation using the TLB
* hardware.
*
+ * The extent and miplevel are only used to validate tile stores (to match the
+ * region to store against the miplevel dimensions to avoid avoid cases where
+ * the region to store is not a aligned to tile boundaries). If extent is
+ * NULL no checks are done (which is fine if the image will only be used for a
+ * TLB load or when we know in advance that the store will be for the entire
+ * size of the image miplevel).
+ *
* For tlb copies we are doing a per-plane copy, so for multi-plane formats,
* the compatible format will be single-plane.
*/
bool
v3dv_meta_can_use_tlb(struct v3dv_image *image,
uint8_t plane,
+ uint8_t miplevel,
const VkOffset3D *offset,
+ const VkExtent3D *extent,
VkFormat *compat_format)
{
if (offset->x != 0 || offset->y != 0)
return false;
+ /* FIXME: this is suboptimal, what we really want to check is that the
+ * extent of the region to copy is the full slice or a multiple of the
+ * tile size.
+ */
+ if (extent) {
+ struct v3d_resource_slice *slice = &image->planes[plane].slices[miplevel];
+ if (slice->width != extent->width || slice->height != extent->height)
+ return false;
+ }
+
if (image->format->planes[plane].rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
if (compat_format)
*compat_format = image->planes[plane].vk_format;
@@ -403,8 +422,11 @@ copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
assert(plane < image->plane_count);
- if (!v3dv_meta_can_use_tlb(image, plane, &region->imageOffset, &fb_format))
+ if (!v3dv_meta_can_use_tlb(image, plane, region->imageSubresource.mipLevel,
+ &region->imageOffset, &region->imageExtent,
+ &fb_format)) {
return false;
+ }
uint32_t internal_type, internal_bpp;
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
@@ -431,8 +453,9 @@ copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
- v3dv_job_start_frame(job, width, height, num_layers, false, true,
- 1, internal_bpp, false);
+ v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
+ internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
+ false);
struct v3dv_meta_framebuffer framebuffer;
v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
@@ -459,26 +482,89 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
VkFilter filter,
bool dst_is_padded_image);
+
/**
- * Returns true if the implementation supports the requested operation (even if
- * it failed to process it, for example, due to an out-of-memory error).
+ * A structure that contains all the information we may need in various
+ * processes involving image to buffer copies implemented with blit paths.
*/
-static bool
-copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_buffer *buffer,
- struct v3dv_image *image,
- const VkBufferImageCopy2 *region)
+struct image_to_buffer_info {
+ /* Source image info */
+ VkFormat src_format;
+ uint8_t plane;
+ VkColorComponentFlags cmask;
+ VkComponentMapping cswizzle;
+ VkImageAspectFlags src_copy_aspect;
+ uint32_t block_width;
+ uint32_t block_height;
+
+ /* Destination buffer info */
+ VkFormat dst_format;
+ uint32_t buf_width;
+ uint32_t buf_height;
+ uint32_t buf_bpp;
+ VkImageAspectFlags dst_copy_aspect;
+};
+
+static VkImageBlit2
+blit_region_for_image_to_buffer(const VkOffset3D *offset,
+ const VkExtent3D *extent,
+ uint32_t mip_level,
+ uint32_t base_layer,
+ uint32_t layer_offset,
+ struct image_to_buffer_info *info)
{
- bool handled = false;
+ VkImageBlit2 output = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
+ .srcSubresource = {
+ .aspectMask = info->src_copy_aspect,
+ .mipLevel = mip_level,
+ .baseArrayLayer = base_layer + layer_offset,
+ .layerCount = 1,
+ },
+ .srcOffsets = {
+ {
+ DIV_ROUND_UP(offset->x, info->block_width),
+ DIV_ROUND_UP(offset->y, info->block_height),
+ offset->z + layer_offset,
+ },
+ {
+ DIV_ROUND_UP(offset->x + extent->width, info->block_width),
+ DIV_ROUND_UP(offset->y + extent->height, info->block_height),
+ offset->z + layer_offset + 1,
+ },
+ },
+ .dstSubresource = {
+ .aspectMask = info->dst_copy_aspect,
+ .mipLevel = 0,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ .dstOffsets = {
+ { 0, 0, 0 },
+ {
+ DIV_ROUND_UP(extent->width, info->block_width),
+ DIV_ROUND_UP(extent->height, info->block_height),
+ 1
+ },
+ },
+ };
- /* This path uses a shader blit which doesn't support linear images. Return
- * early to avoid all the heavy lifting in preparation for the
- * blit_shader() call that is bound to fail in that scenario.
- */
- if (image->vk.tiling == VK_IMAGE_TILING_LINEAR &&
- image->vk.image_type != VK_IMAGE_TYPE_1D) {
- return handled;
- }
+ return output;
+}
+
+/**
+ * Produces an image_to_buffer_info struct from a VkBufferImageCopy2 that we can
+ * use to implement buffer to image copies with blit paths.
+ *
+ * Returns false if the copy operation can't be implemented with a blit.
+ */
+static bool
+gather_image_to_buffer_info(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_image *image,
+ const VkBufferImageCopy2 *region,
+ struct image_to_buffer_info *out_info)
+{
+ bool supported = false;
VkImageAspectFlags dst_copy_aspect = region->imageSubresource.aspectMask;
/* For multi-planar images we copy one plane at a time using an image alias
@@ -572,7 +658,7 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
break;
default:
unreachable("unsupported aspect");
- return handled;
+ return supported;
};
break;
case 2:
@@ -588,7 +674,7 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
break;
default:
unreachable("unsupported bit-size");
- return handled;
+ return supported;
};
/* The hardware doesn't support linear depth/stencil stores, so we
@@ -600,7 +686,7 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
dst_copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
/* We should be able to handle the blit if we got this far */
- handled = true;
+ supported = true;
/* Obtain the 2D buffer region spec */
uint32_t buf_width, buf_height;
@@ -619,98 +705,246 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
vk_format_get_blockwidth(image->planes[plane].vk_format);
uint32_t block_height =
vk_format_get_blockheight(image->planes[plane].vk_format);
- buf_width = buf_width / block_width;
- buf_height = buf_height / block_height;
+ buf_width = DIV_ROUND_UP(buf_width, block_width);
+ buf_height = DIV_ROUND_UP(buf_height, block_height);
+
+ out_info->src_format = src_format;
+ out_info->dst_format = dst_format;
+ out_info->src_copy_aspect = src_copy_aspect;
+ out_info->dst_copy_aspect = dst_copy_aspect;
+ out_info->buf_width = buf_width;
+ out_info->buf_height = buf_height;
+ out_info->buf_bpp = buffer_bpp;
+ out_info->block_width = block_width;
+ out_info->block_height = block_height;
+ out_info->cmask = cmask;
+ out_info->cswizzle = cswizzle;
+ out_info->plane = plane;
+
+ return supported;
+}
- /* Compute layers to copy */
- uint32_t num_layers;
- if (image->vk.image_type != VK_IMAGE_TYPE_3D)
- num_layers = region->imageSubresource.layerCount;
- else
- num_layers = region->imageExtent.depth;
- assert(num_layers > 0);
+/* Creates a linear image to alias buffer memory. It also includes that image
+ * as a private object in the cmd_buffer.
+ *
+ * This is used for cases where we want to implement an image to buffer copy,
+ * but we need to rely on a mechanism that uses an image as destination, like
+ * blitting.
+ */
+static VkResult
+create_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_buffer *buffer,
+ const VkBufferImageCopy2 *region,
+ struct image_to_buffer_info *info,
+ uint32_t layer,
+ VkImage *out_image)
+{
+ VkImageCreateInfo image_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = info->dst_format,
+ .extent = { info->buf_width, info->buf_height, 1 },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .tiling = VK_IMAGE_TILING_LINEAR,
+ .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
- /* Our blit interface can see the real format of the images to detect
- * copies between compressed and uncompressed images and adapt the
- * blit region accordingly. Here we are just doing a raw copy of
- * compressed data, but we are passing an uncompressed view of the
- * buffer for the blit destination image (since compressed formats are
- * not renderable), so we also want to provide an uncompressed view of
- * the source image.
- */
VkResult result;
struct v3dv_device *device = cmd_buffer->device;
VkDevice _device = v3dv_device_to_handle(device);
- if (vk_format_is_compressed(image->vk.format)) {
- assert(image->plane_count == 1);
- VkImage uiview;
- VkImageCreateInfo uiview_info = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
- .imageType = VK_IMAGE_TYPE_3D,
- .format = dst_format,
- .extent = { buf_width, buf_height, image->vk.extent.depth },
- .mipLevels = image->vk.mip_levels,
- .arrayLayers = image->vk.array_layers,
- .samples = image->vk.samples,
- .tiling = image->vk.tiling,
- .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = 0,
- .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
- };
- result = v3dv_CreateImage(_device, &uiview_info, &device->vk.alloc, &uiview);
- if (result != VK_SUCCESS)
- return handled;
- v3dv_cmd_buffer_add_private_obj(
- cmd_buffer, (uintptr_t)uiview,
- (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
+ VkImage buffer_image;
+ result =
+ v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
+ if (result != VK_SUCCESS)
+ return result;
- result =
- vk_common_BindImageMemory(_device, uiview,
- v3dv_device_memory_to_handle(image->planes[plane].mem),
- image->planes[plane].mem_offset);
- if (result != VK_SUCCESS)
- return handled;
+ *out_image = buffer_image;
+
+ v3dv_cmd_buffer_add_private_obj(
+ cmd_buffer, (uintptr_t)buffer_image,
+ (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
+
+ /* Bind the buffer memory to the image
+ */
+ VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
+ layer * info->buf_width * info->buf_height * info->buf_bpp;
+
+ result =
+ vk_common_BindImageMemory(_device, buffer_image,
+ v3dv_device_memory_to_handle(buffer->mem),
+ buffer_offset);
+ return result;
+}
+
+/**
+ * Creates an image with a single mip level that aliases the memory of a
+ * mip level in another image, re-interpreting the memory with an uncompressed
+ * format. The image is added to the command buffer as a private object for
+ * disposal.
+ */
+static bool
+create_image_mip_level_alias(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_image *image,
+ VkFormat format,
+ uint32_t plane,
+ uint32_t mip_level,
+ uint32_t layer,
+ VkImage *alias)
+{
+ VkResult result;
+ assert(!vk_format_is_compressed(format));
+
+ struct v3dv_device *device = cmd_buffer->device;
+ VkDevice vk_device = v3dv_device_to_handle(device);
+ uint32_t mip_width = image->planes[plane].slices[mip_level].width;
+ uint32_t mip_height = image->planes[plane].slices[mip_level].height;
+
+ uint32_t block_width =
+ vk_format_get_blockwidth(image->planes[plane].vk_format);
+ uint32_t block_height =
+ vk_format_get_blockheight(image->planes[plane].vk_format);
+
+ VkImageCreateInfo info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = image->vk.image_type,
+ .format = format,
+ .extent = { DIV_ROUND_UP(mip_width, block_width),
+ DIV_ROUND_UP(mip_height, block_height),
+ 1 },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = image->vk.samples,
+ .tiling = image->tiled ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
+ .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ result = v3dv_CreateImage(vk_device, &info, &device->vk.alloc, alias);
+ if (result != VK_SUCCESS)
+ return false;
+
+ /* The alias we have just created has just one mip, but we may be aliasing
+ * any mip in the original image. Because the slice setup changes based on
+ * the mip (particularly, for mips >= 2 it uses power of 2 sizes internally)
+ * and this can influence the tiling layout selected for the slice, we want
+ * to make sure we copy the slice description from the actual mip level in
+ * the original image, and then rewrite any fields that we need for the
+ * alias. Particularly, we want to make the offset 0 because we are going to
+ * bind the underlying image memory exactly at the start of the selected mip.
+ * We also want to relax the image alignment requirements to the minimum
+ * (the one imposed by the Texture Base Address field) since we may not be
+ * aliasing a level 0 (for which we typically want a page alignment for
+ * optimal performance).
+ */
+ V3DV_FROM_HANDLE(v3dv_image, v3dv_alias, *alias);
+ v3dv_alias->planes[plane].slices[0] = image->planes[plane].slices[mip_level];
+ v3dv_alias->planes[plane].slices[0].width = info.extent.width;
+ v3dv_alias->planes[plane].slices[0].height = info.extent.height;
+ v3dv_alias->planes[plane].slices[0].offset = 0;
+ v3dv_alias->planes[plane].alignment = 64;
+
+ v3dv_cmd_buffer_add_private_obj(
+ cmd_buffer, (uintptr_t)*alias,
+ (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
+
+ result =
+ vk_common_BindImageMemory(vk_device, *alias,
+ v3dv_device_memory_to_handle(image->planes[plane].mem),
+ v3dv_layer_offset(image, mip_level, layer, plane));
+ return result == VK_SUCCESS;
+}
+
+/**
+ * Returns true if the implementation supports the requested operation (even if
+ * it failed to process it, for example, due to an out-of-memory error).
+ */
+static bool
+copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_buffer *buffer,
+ struct v3dv_image *image,
+ const VkBufferImageCopy2 *region)
+{
+ bool handled = false;
+ struct image_to_buffer_info info;
- image = v3dv_image_from_handle(uiview);
+ /* This path uses a shader blit which doesn't support linear images. Return
+ * early to avoid all the heavy lifting in preparation for the
+ * blit_shader() call that is bound to fail in that scenario.
+ */
+ if (!image->tiled && image->vk.image_type != VK_IMAGE_TYPE_1D) {
+ return handled;
}
+ handled = gather_image_to_buffer_info(cmd_buffer, image, region,
+ &info);
+
+ if (!handled)
+ return handled;
+
+ /* We should be able to handle the blit if we got this far */
+ handled = true;
+
+ /* Compute layers to copy */
+ uint32_t num_layers;
+ if (image->vk.image_type != VK_IMAGE_TYPE_3D)
+ num_layers = region->imageSubresource.layerCount;
+ else
+ num_layers = region->imageExtent.depth;
+ assert(num_layers > 0);
+
/* Copy requested layers */
+ VkResult result;
+ VkImageBlit2 blit_region;
+ uint32_t mip_level = region->imageSubresource.mipLevel;
+ uint32_t base_layer = region->imageSubresource.baseArrayLayer;
for (uint32_t i = 0; i < num_layers; i++) {
- /* Create the destination blit image from the destination buffer */
- VkImageCreateInfo image_info = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
- .imageType = VK_IMAGE_TYPE_2D,
- .format = dst_format,
- .extent = { buf_width, buf_height, 1 },
- .mipLevels = 1,
- .arrayLayers = 1,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .tiling = VK_IMAGE_TILING_LINEAR,
- .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = 0,
- .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
- };
-
- VkImage buffer_image;
- result =
- v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
- if (result != VK_SUCCESS)
- return handled;
+ uint32_t layer_offset = i;
+
+ if (vk_format_is_compressed(image->vk.format)) {
+ /* Our blit interface can see the real format of the images to detect
+ * copies between compressed and uncompressed images and adapt the
+ * blit region accordingly. Here we are just doing a raw copy of
+ * compressed data, but we are passing an uncompressed view of the
+ * buffer for the blit destination image (since compressed formats are
+ * not renderable), so we also want to provide an uncompressed view of
+ * the source image.
+ *
+ * It is important that we create the alias over the selected mip
+ * level (instead of aliasing the entire image) because an uncompressed
+ * view of the image won't have the same number of mip levels as the
+ * original image and the implicit mip size calculations the hw will
+ * do to sample from a non-zero mip level may not match exactly between
+ * compressed and uncompressed views.
+ */
+ VkImage alias;
+ if (!create_image_mip_level_alias(cmd_buffer, image, info.dst_format,
+ info.plane, mip_level,
+ base_layer + layer_offset,
+ &alias)) {
+ return handled;
+ }
- v3dv_cmd_buffer_add_private_obj(
- cmd_buffer, (uintptr_t)buffer_image,
- (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
+ /* We are aliasing the selected mip level and layer with a
+ * single-mip and single-layer image.
+ */
+ image = v3dv_image_from_handle(alias);
+ mip_level = 0;
+ base_layer = 0;
+ layer_offset = 0;
+ }
- /* Bind the buffer memory to the image */
- VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
- i * buf_width * buf_height * buffer_bpp;
+ /* Create the destination blit image from the destination buffer */
+ VkImage buffer_image;
result =
- vk_common_BindImageMemory(_device, buffer_image,
- v3dv_device_memory_to_handle(buffer->mem),
- buffer_offset);
+ create_image_from_buffer(cmd_buffer, buffer, region, &info,
+ i, &buffer_image);
if (result != VK_SUCCESS)
return handled;
@@ -722,48 +956,17 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
* image, but that we need to blit to a S8D24 destination (the only
* stencil format we support).
*/
- const VkImageBlit2 blit_region = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
- .srcSubresource = {
- .aspectMask = src_copy_aspect,
- .mipLevel = region->imageSubresource.mipLevel,
- .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
- .layerCount = 1,
- },
- .srcOffsets = {
- {
- DIV_ROUND_UP(region->imageOffset.x, block_width),
- DIV_ROUND_UP(region->imageOffset.y, block_height),
- region->imageOffset.z + i,
- },
- {
- DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
- block_width),
- DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
- block_height),
- region->imageOffset.z + i + 1,
- },
- },
- .dstSubresource = {
- .aspectMask = dst_copy_aspect,
- .mipLevel = 0,
- .baseArrayLayer = 0,
- .layerCount = 1,
- },
- .dstOffsets = {
- { 0, 0, 0 },
- {
- DIV_ROUND_UP(region->imageExtent.width, block_width),
- DIV_ROUND_UP(region->imageExtent.height, block_height),
- 1
- },
- },
- };
+ blit_region =
+ blit_region_for_image_to_buffer(&region->imageOffset,
+ &region->imageExtent,
+ mip_level, base_layer, layer_offset,
+ &info);
handled = blit_shader(cmd_buffer,
- v3dv_image_from_handle(buffer_image), dst_format,
- image, src_format,
- cmask, &cswizzle,
+ v3dv_image_from_handle(buffer_image),
+ info.dst_format,
+ image, info.src_format,
+ info.cmask, &info.cswizzle,
&blit_region, VK_FILTER_NEAREST, false);
if (!handled) {
/* This is unexpected, we should have a supported blit spec */
@@ -776,6 +979,107 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
return true;
}
+static bool
+copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_image *dst,
+ struct v3dv_image *src,
+ const VkImageCopy2 *region);
+
+static VkImageCopy2
+image_copy_region_for_image_to_buffer(const VkBufferImageCopy2 *region,
+ struct image_to_buffer_info *info,
+ uint32_t layer)
+{
+ VkImageCopy2 output = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2,
+ .srcSubresource = {
+ .aspectMask = info->src_copy_aspect,
+ .mipLevel = region->imageSubresource.mipLevel,
+ .baseArrayLayer = region->imageSubresource.baseArrayLayer + layer,
+ .layerCount = 1,
+ },
+ .srcOffset = {
+ DIV_ROUND_UP(region->imageOffset.x, info->block_width),
+ DIV_ROUND_UP(region->imageOffset.y, info->block_height),
+ region->imageOffset.z,
+ },
+ .dstSubresource = {
+ .aspectMask = info->dst_copy_aspect,
+ .mipLevel = 0,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ .dstOffset = { 0, 0, 0 },
+ .extent = {
+ DIV_ROUND_UP(region->imageExtent.width, info->block_width),
+ DIV_ROUND_UP(region->imageExtent.height, info->block_height),
+ 1
+ },
+ };
+
+ return output;
+}
+
+/**
+ * Returns true if the implementation supports the requested operation (even if
+ * it failed to process it, for example, due to an out-of-memory error).
+ */
+static bool
+copy_image_to_buffer_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_buffer *dst_buffer,
+ struct v3dv_image *src_image,
+ const VkBufferImageCopy2 *region)
+{
+ bool handled = false;
+ VkImage dst_buffer_image;
+ struct image_to_buffer_info info;
+
+ /* This is a requirement for copy_image_linear_texel_buffer below. We check
+ * it in advance in order to do an early return
+ */
+ if (src_image->tiled)
+ return false;
+
+ handled =
+ gather_image_to_buffer_info(cmd_buffer, src_image, region,
+ &info);
+ if (!handled)
+ return handled;
+
+ /* At this point the implementation should support the copy, any possible
+ * error below are for different reasons, like out-of-memory error
+ */
+ handled = true;
+
+ uint32_t num_layers;
+ if (src_image->vk.image_type != VK_IMAGE_TYPE_3D)
+ num_layers = region->imageSubresource.layerCount;
+ else
+ num_layers = region->imageExtent.depth;
+ assert(num_layers > 0);
+
+ VkResult result;
+ VkImageCopy2 image_region;
+ for (uint32_t layer = 0; layer < num_layers; layer++) {
+ /* Create the destination image from the destination buffer */
+ result =
+ create_image_from_buffer(cmd_buffer, dst_buffer, region, &info,
+ layer, &dst_buffer_image);
+ if (result != VK_SUCCESS)
+ return handled;
+
+ image_region =
+ image_copy_region_for_image_to_buffer(region, &info, layer);
+
+ handled =
+ copy_image_linear_texel_buffer(cmd_buffer,
+ v3dv_image_from_handle(dst_buffer_image),
+ src_image, &image_region);
+ }
+
+ return handled;
+}
+
VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
const VkCopyImageToBufferInfo2 *info)
@@ -798,6 +1102,9 @@ v3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, region))
continue;
+ if (copy_image_to_buffer_texel_buffer(cmd_buffer, buffer, image, region))
+ continue;
+
unreachable("Unsupported image to buffer copy.");
}
cmd_buffer->state.is_transfer = false;
@@ -819,7 +1126,7 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
}
/* Destination can't be raster format */
- if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
+ if (!dst->tiled)
return false;
/* We can only do full copies, so if the format is D24S8 both aspects need
@@ -947,6 +1254,15 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
return true;
}
+inline bool
+v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_image *dst,
+ struct v3dv_image *src,
+ const VkImageCopy2 *region)
+{
+ return copy_image_tfu(cmd_buffer, dst, src, region);
+}
+
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
@@ -965,9 +1281,12 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
assert(dst_plane < dst->plane_count);
VkFormat fb_format;
- if (!v3dv_meta_can_use_tlb(src, src_plane, &region->srcOffset, &fb_format) ||
- !v3dv_meta_can_use_tlb(dst, dst_plane, &region->dstOffset, &fb_format))
+ if (!v3dv_meta_can_use_tlb(src, src_plane, region->srcSubresource.mipLevel,
+ &region->srcOffset, NULL, &fb_format) ||
+ !v3dv_meta_can_use_tlb(dst, dst_plane, region->dstSubresource.mipLevel,
+ &region->dstOffset, &region->extent, &fb_format)) {
return false;
+ }
/* From the Vulkan spec, VkImageCopy valid usage:
*
@@ -1013,8 +1332,8 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
- v3dv_job_start_frame(job, width, height, num_layers,
- false, true, 1, internal_bpp,
+ v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
+ internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
src->vk.samples > VK_SAMPLE_COUNT_1_BIT);
struct v3dv_meta_framebuffer framebuffer;
@@ -1066,7 +1385,7 @@ create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
.mipLevels = src->vk.mip_levels,
.arrayLayers = src->vk.array_layers,
.samples = src->vk.samples,
- .tiling = src->vk.tiling,
+ .tiling = src->tiled ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
.usage = src->vk.usage,
};
@@ -1094,8 +1413,7 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *src,
const VkImageCopy2 *region)
{
- if (src->vk.tiling == VK_IMAGE_TILING_LINEAR &&
- src->vk.image_type != VK_IMAGE_TYPE_1D)
+ if (!src->tiled && src->vk.image_type != VK_IMAGE_TYPE_1D)
return false;
uint8_t src_plane =
@@ -1207,14 +1525,21 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
* (since the region dimensions are already specified in terms of the source
* image).
*/
+ uint32_t region_width = region->extent.width * src_scale_w;
+ uint32_t region_height = region->extent.height * src_scale_h;
+ if (src_block_w > 1)
+ region_width = util_next_power_of_two(region_width);
+ if (src_block_h > 1)
+ region_height = util_next_power_of_two(region_height);
+
const VkOffset3D src_start = {
region->srcOffset.x * src_scale_w,
region->srcOffset.y * src_scale_h,
region->srcOffset.z,
};
const VkOffset3D src_end = {
- src_start.x + region->extent.width * src_scale_w,
- src_start.y + region->extent.height * src_scale_h,
+ src_start.x + region_width,
+ src_start.y + region_height,
src_start.z + region->extent.depth,
};
@@ -1224,8 +1549,8 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
region->dstOffset.z,
};
const VkOffset3D dst_end = {
- dst_start.x + region->extent.width * src_scale_w,
- dst_start.y + region->extent.height * src_scale_h,
+ dst_start.x + region_width,
+ dst_start.y + region_height,
dst_start.z + region->extent.depth,
};
@@ -1253,7 +1578,7 @@ copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *src,
const VkImageCopy2 *region)
{
- if (src->vk.tiling != VK_IMAGE_TILING_LINEAR)
+ if (src->tiled)
return false;
/* Implementations are allowed to restrict linear images like this */
@@ -1507,7 +1832,7 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
/* Destination can't be raster format */
- if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
+ if (!image->tiled)
return false;
/* We can't copy D24S8 because buffer to image copies only copy one aspect
@@ -1539,11 +1864,13 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
else
height = region->bufferImageHeight;
- uint8_t plane =
+ const uint8_t plane =
v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
- if (width != image->planes[plane].width ||
- height != image->planes[plane].height)
+ const uint32_t mip_level = region->imageSubresource.mipLevel;
+ const struct v3d_resource_slice *slice = &image->planes[plane].slices[mip_level];
+
+ if (width != slice->width || height != slice->height)
return false;
/* Handle region semantics for compressed images */
@@ -1566,9 +1893,6 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
assert(format->plane_count == 1);
const struct v3dv_format_plane *format_plane = &format->planes[0];
- const uint32_t mip_level = region->imageSubresource.mipLevel;
- const struct v3d_resource_slice *slice = &image->planes[plane].slices[mip_level];
-
uint32_t num_layers;
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
@@ -1631,8 +1955,11 @@ copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
assert(plane < image->plane_count);
- if (!v3dv_meta_can_use_tlb(image, plane, &region->imageOffset, &fb_format))
+ if (!v3dv_meta_can_use_tlb(image, plane, region->imageSubresource.mipLevel,
+ &region->imageOffset, &region->imageExtent,
+ &fb_format)) {
return false;
+ }
uint32_t internal_type, internal_bpp;
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
@@ -1659,8 +1986,9 @@ copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
- v3dv_job_start_frame(job, width, height, num_layers, false, true,
- 1, internal_bpp, false);
+ v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
+ internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
+ false);
struct v3dv_meta_framebuffer framebuffer;
v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
@@ -1832,7 +2160,7 @@ get_texel_buffer_copy_vs()
glsl_vec4_type(), "gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
- nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
+ nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
nir_store_var(&b, vs_out_pos, pos, 0xf);
return b.shader;
@@ -1855,8 +2183,8 @@ get_texel_buffer_copy_gs()
nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
(1ull << VARYING_SLOT_LAYER);
- nir->info.gs.input_primitive = SHADER_PRIM_TRIANGLES;
- nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
+ nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
+ nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
nir->info.gs.vertices_in = 3;
nir->info.gs.vertices_out = 3;
nir->info.gs.invocations = 1;
@@ -1889,7 +2217,7 @@ get_texel_buffer_copy_gs()
nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
/* gl_Layer from push constants */
- nir_ssa_def *layer =
+ nir_def *layer =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
.base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
.range = 4);
@@ -1903,7 +2231,7 @@ get_texel_buffer_copy_gs()
return nir;
}
-static nir_ssa_def *
+static nir_def *
load_frag_coord(nir_builder *b)
{
nir_foreach_shader_in_variable(var, b->shader) {
@@ -1967,24 +2295,24 @@ get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
/* Load the box describing the pixel region we want to copy from the
* texel buffer.
*/
- nir_ssa_def *box =
+ nir_def *box =
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
.base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
.range = 16);
/* Load the buffer stride (this comes in texel units) */
- nir_ssa_def *stride =
+ nir_def *stride =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
.base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
.range = 4);
/* Load the buffer offset (this comes in texel units) */
- nir_ssa_def *offset =
+ nir_def *offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
.base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
.range = 4);
- nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b));
+ nir_def *coord = nir_f2i32(&b, load_frag_coord(&b));
/* Load pixel data from texel buffer based on the x,y offset of the pixel
* within the box. Texel buffers are 1D arrays of texels.
@@ -1994,28 +2322,26 @@ get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
* texel buffer should always be within its bounds and we we don't need
* to add a check for that here.
*/
- nir_ssa_def *x_offset =
+ nir_def *x_offset =
nir_isub(&b, nir_channel(&b, coord, 0),
nir_channel(&b, box, 0));
- nir_ssa_def *y_offset =
+ nir_def *y_offset =
nir_isub(&b, nir_channel(&b, coord, 1),
nir_channel(&b, box, 1));
- nir_ssa_def *texel_offset =
+ nir_def *texel_offset =
nir_iadd(&b, nir_iadd(&b, offset, x_offset),
nir_imul(&b, y_offset, stride));
- nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
+ nir_def *tex_deref = &nir_build_deref_var(&b, sampler)->def;
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
tex->op = nir_texop_txf;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(texel_offset);
- tex->src[1].src_type = nir_tex_src_texture_deref;
- tex->src[1].src = nir_src_for_ssa(tex_deref);
+ tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, texel_offset);
+ tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
tex->dest_type = nir_type_uint32;
tex->is_array = false;
tex->coord_components = 1;
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "texel buffer result");
+ nir_def_init(&tex->instr, &tex->def, 4, 32);
nir_builder_instr_insert(&b, &tex->instr);
uint32_t swiz[4];
@@ -2027,7 +2353,7 @@ get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
swiz[3] =
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
- nir_ssa_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4);
+ nir_def *s = nir_swizzle(&b, &tex->def, swiz, 4);
nir_store_var(&b, fs_out_color, s, 0xf);
return b.shader;
@@ -2883,76 +3209,6 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
}
}
-/**
- * Returns true if the implementation supports the requested operation (even if
- * it failed to process it, for example, due to an out-of-memory error).
- */
-static bool
-copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_image *image,
- struct v3dv_buffer *buffer,
- const VkBufferImageCopy2 *region)
-{
- /* FIXME */
- if (vk_format_is_depth_or_stencil(image->vk.format))
- return false;
-
- if (vk_format_is_compressed(image->vk.format))
- return false;
-
- if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
- return false;
-
- uint32_t buffer_width, buffer_height;
- if (region->bufferRowLength == 0)
- buffer_width = region->imageExtent.width;
- else
- buffer_width = region->bufferRowLength;
-
- if (region->bufferImageHeight == 0)
- buffer_height = region->imageExtent.height;
- else
- buffer_height = region->bufferImageHeight;
-
- uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
- assert(plane < image->plane_count);
-
- uint32_t buffer_stride = buffer_width * image->planes[plane].cpp;
- uint32_t buffer_layer_stride = buffer_stride * buffer_height;
-
- uint32_t num_layers;
- if (image->vk.image_type != VK_IMAGE_TYPE_3D)
- num_layers = region->imageSubresource.layerCount;
- else
- num_layers = region->imageExtent.depth;
- assert(num_layers > 0);
-
- struct v3dv_job *job =
- v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
- V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
- cmd_buffer, -1);
- if (!job)
- return true;
-
- job->cpu.copy_buffer_to_image.image = image;
- job->cpu.copy_buffer_to_image.buffer = buffer;
- job->cpu.copy_buffer_to_image.buffer_stride = buffer_stride;
- job->cpu.copy_buffer_to_image.buffer_layer_stride = buffer_layer_stride;
- job->cpu.copy_buffer_to_image.buffer_offset = region->bufferOffset;
- job->cpu.copy_buffer_to_image.image_extent = region->imageExtent;
- job->cpu.copy_buffer_to_image.image_offset = region->imageOffset;
- job->cpu.copy_buffer_to_image.mip_level =
- region->imageSubresource.mipLevel;
- job->cpu.copy_buffer_to_image.base_layer =
- region->imageSubresource.baseArrayLayer;
- job->cpu.copy_buffer_to_image.layer_count = num_layers;
- job->cpu.copy_buffer_to_image.plane = plane;
-
- list_addtail(&job->list_link, &cmd_buffer->jobs);
-
- return true;
-}
-
VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
const VkCopyBufferToImageInfo2 *info)
@@ -3013,11 +3269,6 @@ v3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
* slow it might not be worth it and we should instead put more effort
* in handling more cases with the other paths.
*/
- if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer, &info->pRegions[r])) {
- batch_size = 1;
- goto handled;
- }
-
if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
batch_size, &info->pRegions[r], false)) {
goto handled;
@@ -3072,7 +3323,7 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
return false;
/* Destination can't be raster format */
- if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
+ if (!dst->tiled)
return false;
/* Source region must start at (0,0) */
@@ -3301,16 +3552,16 @@ create_blit_render_pass(struct v3dv_device *device,
return result == VK_SUCCESS;
}
-static nir_ssa_def *
+static nir_def *
gen_tex_coords(nir_builder *b)
{
- nir_ssa_def *tex_box =
+ nir_def *tex_box =
nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);
- nir_ssa_def *tex_z =
+ nir_def *tex_z =
nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
- nir_ssa_def *vertex_id = nir_load_vertex_id(b);
+ nir_def *vertex_id = nir_load_vertex_id(b);
/* vertex 0: src0_x, src0_y
* vertex 1: src0_x, src1_y
@@ -3323,11 +3574,11 @@ gen_tex_coords(nir_builder *b)
* channel 1 is vertex id & 1 ? src1_y : src0_y
*/
- nir_ssa_def *one = nir_imm_int(b, 1);
- nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
- nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
+ nir_def *one = nir_imm_int(b, 1);
+ nir_def *c0cmp = nir_ilt_imm(b, vertex_id, 2);
+ nir_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
- nir_ssa_def *comp[4];
+ nir_def *comp[4];
comp[0] = nir_bcsel(b, c0cmp,
nir_channel(b, tex_box, 0),
nir_channel(b, tex_box, 2));
@@ -3340,9 +3591,9 @@ gen_tex_coords(nir_builder *b)
return nir_vec(b, comp, 4);
}
-static nir_ssa_def *
+static nir_def *
build_nir_tex_op_read(struct nir_builder *b,
- nir_ssa_def *tex_pos,
+ nir_def *tex_pos,
enum glsl_base_type tex_type,
enum glsl_sampler_dim dim)
{
@@ -3355,57 +3606,49 @@ build_nir_tex_op_read(struct nir_builder *b,
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
- nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
+ nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
tex->sampler_dim = dim;
tex->op = nir_texop_tex;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(tex_pos);
- tex->src[1].src_type = nir_tex_src_texture_deref;
- tex->src[1].src = nir_src_for_ssa(tex_deref);
- tex->src[2].src_type = nir_tex_src_sampler_deref;
- tex->src[2].src = nir_src_for_ssa(tex_deref);
+ tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, tex_pos);
+ tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
+ tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref, tex_deref);
tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
tex->is_array = glsl_sampler_type_is_array(sampler_type);
tex->coord_components = tex_pos->num_components;
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_def_init(&tex->instr, &tex->def, 4, 32);
nir_builder_instr_insert(b, &tex->instr);
- return &tex->dest.ssa;
+ return &tex->def;
}
-static nir_ssa_def *
+static nir_def *
build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
nir_variable *sampler,
- nir_ssa_def *tex_deref,
+ nir_def *tex_deref,
enum glsl_base_type tex_type,
- nir_ssa_def *tex_pos,
- nir_ssa_def *sample_idx)
+ nir_def *tex_pos,
+ nir_def *sample_idx)
{
- nir_tex_instr *tex = nir_tex_instr_create(b->shader, 4);
+ nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
tex->op = nir_texop_txf_ms;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(tex_pos);
- tex->src[1].src_type = nir_tex_src_texture_deref;
- tex->src[1].src = nir_src_for_ssa(tex_deref);
- tex->src[2].src_type = nir_tex_src_sampler_deref;
- tex->src[2].src = nir_src_for_ssa(tex_deref);
- tex->src[3].src_type = nir_tex_src_ms_index;
- tex->src[3].src = nir_src_for_ssa(sample_idx);
+ tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, tex_pos);
+ tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
+ tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_ms_index, sample_idx);
tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
tex->is_array = false;
tex->coord_components = tex_pos->num_components;
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_def_init(&tex->instr, &tex->def, 4, 32);
nir_builder_instr_insert(b, &tex->instr);
- return &tex->dest.ssa;
+ return &tex->def;
}
/* Fetches all samples at the given position and averages them */
-static nir_ssa_def *
+static nir_def *
build_nir_tex_op_ms_resolve(struct nir_builder *b,
- nir_ssa_def *tex_pos,
+ nir_def *tex_pos,
enum glsl_base_type tex_type,
VkSampleCountFlagBits src_samples)
{
@@ -3419,10 +3662,10 @@ build_nir_tex_op_ms_resolve(struct nir_builder *b,
const bool is_int = glsl_base_type_is_integer(tex_type);
- nir_ssa_def *tmp = NULL;
- nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
+ nir_def *tmp = NULL;
+ nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
for (uint32_t i = 0; i < src_samples; i++) {
- nir_ssa_def *s =
+ nir_def *s =
build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
tex_type, tex_pos,
nir_imm_int(b, i));
@@ -3437,13 +3680,13 @@ build_nir_tex_op_ms_resolve(struct nir_builder *b,
}
assert(!is_int);
- return nir_fmul(b, tmp, nir_imm_float(b, 1.0f / src_samples));
+ return nir_fmul_imm(b, tmp, 1.0f / src_samples);
}
/* Fetches the current sample (gl_SampleID) at the given position */
-static nir_ssa_def *
+static nir_def *
build_nir_tex_op_ms_read(struct nir_builder *b,
- nir_ssa_def *tex_pos,
+ nir_def *tex_pos,
enum glsl_base_type tex_type)
{
const struct glsl_type *sampler_type =
@@ -3453,17 +3696,17 @@ build_nir_tex_op_ms_read(struct nir_builder *b,
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
- nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
+ nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
tex_type, tex_pos,
nir_load_sample_id(b));
}
-static nir_ssa_def *
+static nir_def *
build_nir_tex_op(struct nir_builder *b,
struct v3dv_device *device,
- nir_ssa_def *tex_pos,
+ nir_def *tex_pos,
enum glsl_base_type tex_type,
VkSampleCountFlagBits dst_samples,
VkSampleCountFlagBits src_samples,
@@ -3507,10 +3750,10 @@ get_blit_vs()
vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
- nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
+ nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
nir_store_var(&b, vs_out_pos, pos, 0xf);
- nir_ssa_def *tex_coord = gen_tex_coords(&b);
+ nir_def *tex_coord = gen_tex_coords(&b);
nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
return b.shader;
@@ -3561,11 +3804,11 @@ get_color_blit_fs(struct v3dv_device *device,
nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
fs_out_color->data.location = FRAG_RESULT_DATA0;
- nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
+ nir_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
tex_coord = nir_channels(&b, tex_coord, channel_mask);
- nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
+ nir_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
dst_samples, src_samples, sampler_dim);
/* For integer textures, if the bit-size of the destination is too small to
@@ -3580,7 +3823,7 @@ get_color_blit_fs(struct v3dv_device *device,
enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
- nir_ssa_def *c[4];
+ nir_def *c[4];
for (uint32_t i = 0; i < 4; i++) {
c[i] = nir_channel(&b, color, i);
@@ -3598,11 +3841,11 @@ get_color_blit_fs(struct v3dv_device *device,
assert(dst_bit_size > 0);
if (util_format_is_pure_uint(dst_pformat)) {
- nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
+ nir_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
c[i] = nir_umin(&b, c[i], max);
} else {
- nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
- nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
+ nir_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
+ nir_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
}
}
@@ -4062,12 +4305,10 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
/* We don't support rendering to linear depth/stencil, this should have
* been rewritten to a compatible color blit by the caller.
*/
- assert(dst->vk.tiling != VK_IMAGE_TILING_LINEAR ||
- !vk_format_is_depth_or_stencil(dst_format));
+ assert(dst->tiled || !vk_format_is_depth_or_stencil(dst_format));
/* Can't sample from linear images */
- if (src->vk.tiling == VK_IMAGE_TILING_LINEAR &&
- src->vk.image_type != VK_IMAGE_TYPE_1D) {
+ if (!src->tiled && src->vk.image_type != VK_IMAGE_TYPE_1D) {
return false;
}
@@ -4538,8 +4779,10 @@ resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
assert(dst->plane_count == 1);
assert(src->plane_count == 1);
- if (!v3dv_meta_can_use_tlb(src, 0, &region->srcOffset, NULL) ||
- !v3dv_meta_can_use_tlb(dst, 0, &region->dstOffset, NULL)) {
+ if (!v3dv_meta_can_use_tlb(src, 0, region->srcSubresource.mipLevel,
+ &region->srcOffset, NULL, NULL) ||
+ !v3dv_meta_can_use_tlb(dst, 0, region->dstSubresource.mipLevel,
+ &region->dstOffset, &region->extent, NULL)) {
return false;
}
@@ -4572,8 +4815,9 @@ resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
(fb_format, region->srcSubresource.aspectMask,
&internal_type, &internal_bpp);
- v3dv_job_start_frame(job, width, height, num_layers, false, true,
- 1, internal_bpp, true);
+ v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
+ internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
+ true);
struct v3dv_meta_framebuffer framebuffer;
v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_pass.c b/lib/mesa/src/broadcom/vulkan/v3dv_pass.c
index 683acde62..0583faf6f 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_pass.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_pass.c
@@ -234,13 +234,15 @@ v3dv_CreateRenderPass2(VkDevice _device,
.layout = desc->pDepthStencilAttachment->layout,
};
- /* GFXH-1461: if depth is cleared but stencil is loaded (or viceversa),
+ /* GFXH-1461: if depth is cleared but stencil is loaded (or vice versa),
* the clear might get lost. If a subpass has this then we can't emit
- * the clear using the TLB and we have to do it as a draw call.
+ * the clear using the TLB and we have to do it as a draw call. This
+ * issue is fixed since V3D 4.3.18.
*
* FIXME: separate stencil.
*/
- if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+ if (device->devinfo.ver == 42 &&
+ subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
struct v3dv_render_pass_attachment *att =
&pass->attachments[subpass->ds_attachment.attachment];
if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) {
@@ -320,11 +322,12 @@ subpass_get_granularity(struct v3dv_device *device,
/* Granularity is defined by the tile size */
assert(subpass_idx < pass->subpass_count);
struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx];
- const uint32_t color_attachment_count = subpass->color_count;
+ const uint32_t color_count = subpass->color_count;
bool msaa = false;
- uint32_t max_bpp = 0;
- for (uint32_t i = 0; i < color_attachment_count; i++) {
+ uint32_t max_internal_bpp = 0;
+ uint32_t total_color_bpp = 0;
+ for (uint32_t i = 0; i < color_count; i++) {
uint32_t attachment_idx = subpass->color_attachments[i].attachment;
if (attachment_idx == VK_ATTACHMENT_UNUSED)
continue;
@@ -337,7 +340,8 @@ subpass_get_granularity(struct v3dv_device *device,
v3dv_X(device, get_internal_type_bpp_for_output_format)
(format->planes[0].rt_type, &internal_type, &internal_bpp);
- max_bpp = MAX2(max_bpp, internal_bpp);
+ max_internal_bpp = MAX2(max_internal_bpp, internal_bpp);
+ total_color_bpp += 4 * v3d_internal_bpp_words(internal_bpp);
if (desc->samples > VK_SAMPLE_COUNT_1_BIT)
msaa = true;
@@ -347,7 +351,8 @@ subpass_get_granularity(struct v3dv_device *device,
* heuristics so we choose a conservative granularity here, with it disabled.
*/
uint32_t width, height;
- v3d_choose_tile_size(color_attachment_count, max_bpp, msaa,
+ v3d_choose_tile_size(&device->devinfo, color_count,
+ max_internal_bpp, total_color_bpp, msaa,
false /* double-buffer */, &width, &height);
*granularity = (VkExtent2D) {
.width = width,
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c b/lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c
index 116c0f70f..54a26cb14 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c
@@ -30,13 +30,14 @@
#include "qpu/qpu_disasm.h"
#include "compiler/nir/nir_builder.h"
-#include "nir/nir_vulkan.h"
#include "nir/nir_serialize.h"
#include "util/u_atomic.h"
#include "util/u_prim.h"
#include "util/os_time.h"
+#include "util/u_helpers.h"
+#include "vk_nir_convert_ycbcr.h"
#include "vk_pipeline.h"
#include "vulkan/util/vk_format.h"
@@ -192,8 +193,8 @@ const nir_shader_compiler_options v3dv_nir_options = {
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
- .lower_bitfield_insert_to_shifts = true,
- .lower_bitfield_extract_to_shifts = true,
+ .lower_bitfield_insert = true,
+ .lower_bitfield_extract = true,
.lower_bitfield_reverse = true,
.lower_bit_count = true,
.lower_cs_local_id_to_index = true,
@@ -226,10 +227,10 @@ const nir_shader_compiler_options v3dv_nir_options = {
.lower_isign = true,
.lower_ldexp = true,
.lower_mul_high = true,
- .lower_wpos_pntc = true,
- .lower_rotate = true,
+ .lower_wpos_pntc = false,
.lower_to_scalar = true,
.lower_device_index_to_zero = true,
+ .lower_fquantize2f16 = true,
.has_fsub = true,
.has_isub = true,
.vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
@@ -238,7 +239,7 @@ const nir_shader_compiler_options v3dv_nir_options = {
.max_unroll_iterations = 16,
.force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
.divergence_analysis_options =
- nir_divergence_multiple_workgroup_per_compute_subgroup
+ nir_divergence_multiple_workgroup_per_compute_subgroup,
};
const nir_shader_compiler_options *
@@ -546,7 +547,7 @@ lower_vulkan_resource_index(nir_builder *b,
uint32_t start_index = 0;
if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
- start_index = MAX_INLINE_UNIFORM_BUFFERS;
+ start_index += MAX_INLINE_UNIFORM_BUFFERS;
}
index = descriptor_map_add(descriptor_map, set, binding,
@@ -555,14 +556,6 @@ lower_vulkan_resource_index(nir_builder *b,
start_index,
32 /* return_size: doesn't really apply for this case */,
0);
-
- /* We always reserve index 0 for push constants */
- if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
- binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
- binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
- index++;
- }
-
break;
}
@@ -575,7 +568,7 @@ lower_vulkan_resource_index(nir_builder *b,
* vulkan_load_descriptor return a vec2 providing an index and
* offset. Our backend compiler only cares about the index part.
*/
- nir_ssa_def_rewrite_uses(&instr->dest.ssa,
+ nir_def_rewrite_uses(&instr->def,
nir_imm_ivec2(b, index, 0));
nir_instr_remove(&instr->instr);
}
@@ -601,7 +594,7 @@ lower_tex_src(nir_builder *b,
unsigned src_idx,
struct lower_pipeline_layout_state *state)
{
- nir_ssa_def *index = NULL;
+ nir_def *index = NULL;
unsigned base_index = 0;
unsigned array_elements = 1;
nir_tex_src *src = &instr->src[src_idx];
@@ -612,7 +605,6 @@ lower_tex_src(nir_builder *b,
/* We compute first the offsets */
nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
while (deref->deref_type != nir_deref_type_var) {
- assert(deref->parent.is_ssa);
nir_deref_instr *parent =
nir_instr_as_deref(deref->parent.ssa->parent_instr);
@@ -629,8 +621,8 @@ lower_tex_src(nir_builder *b,
}
index = nir_iadd(b, index,
- nir_imul(b, nir_imm_int(b, array_elements),
- nir_ssa_for_src(b, deref->arr.index, 1)));
+ nir_imul_imm(b, deref->arr.index.ssa,
+ array_elements));
}
array_elements *= glsl_get_length(parent->type);
@@ -645,8 +637,7 @@ lower_tex_src(nir_builder *b,
* instr if needed
*/
if (index) {
- nir_instr_rewrite_src(&instr->instr, &src->src,
- nir_src_for_ssa(index));
+ nir_src_rewrite(&src->src, index);
src->src_type = is_sampler ?
nir_tex_src_sampler_offset :
@@ -658,7 +649,7 @@ lower_tex_src(nir_builder *b,
uint32_t set = deref->var->data.descriptor_set;
uint32_t binding = deref->var->data.binding;
/* FIXME: this is a really simplified check for the precision to be used
- * for the sampling. Right now we are ony checking for the variables used
+ * for the sampling. Right now we are only checking for the variables used
* on the operation itself, but there are other cases that we could use to
* infer the precision requirement.
*/
@@ -720,18 +711,20 @@ lower_sampler(nir_builder *b,
int sampler_idx =
nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
- if (sampler_idx >= 0)
+ if (sampler_idx >= 0) {
+ assert(nir_tex_instr_need_sampler(instr));
lower_tex_src(b, instr, sampler_idx, state);
+ }
if (texture_idx < 0 && sampler_idx < 0)
return false;
- /* If we don't have a sampler, we assign it the idx we reserve for this
- * case, and we ensure that it is using the correct return size.
+ /* If the instruction doesn't have a sampler (i.e. txf) we use backend_flags
+ * to bind a default sampler state to configure precission.
*/
if (sampler_idx < 0) {
state->needs_default_sampler_state = true;
- instr->sampler_index = return_size == 16 ?
+ instr->backend_flags = return_size == 16 ?
V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
}
@@ -745,12 +738,11 @@ lower_image_deref(nir_builder *b,
struct lower_pipeline_layout_state *state)
{
nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
- nir_ssa_def *index = NULL;
+ nir_def *index = NULL;
unsigned array_elements = 1;
unsigned base_index = 0;
while (deref->deref_type != nir_deref_type_var) {
- assert(deref->parent.is_ssa);
nir_deref_instr *parent =
nir_instr_as_deref(deref->parent.ssa->parent_instr);
@@ -767,8 +759,8 @@ lower_image_deref(nir_builder *b,
}
index = nir_iadd(b, index,
- nir_imul(b, nir_imm_int(b, array_elements),
- nir_ssa_for_src(b, deref->arr.index, 1)));
+ nir_imul_imm(b, deref->arr.index.ssa,
+ array_elements));
}
array_elements *= glsl_get_length(parent->type);
@@ -833,23 +825,15 @@ lower_intrinsic(nir_builder *b,
/* Loading the descriptor happens as part of load/store instructions,
* so for us this is a no-op.
*/
- nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
+ nir_def_rewrite_uses(&instr->def, instr->src[0].ssa);
nir_instr_remove(&instr->instr);
return true;
}
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_store:
- case nir_intrinsic_image_deref_atomic_add:
- case nir_intrinsic_image_deref_atomic_imin:
- case nir_intrinsic_image_deref_atomic_umin:
- case nir_intrinsic_image_deref_atomic_imax:
- case nir_intrinsic_image_deref_atomic_umax:
- case nir_intrinsic_image_deref_atomic_and:
- case nir_intrinsic_image_deref_atomic_or:
- case nir_intrinsic_image_deref_atomic_xor:
- case nir_intrinsic_image_deref_atomic_exchange:
- case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_deref_atomic:
+ case nir_intrinsic_image_deref_atomic_swap:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_image_deref_samples:
lower_image_deref(b, instr, state);
@@ -907,6 +891,34 @@ lower_pipeline_layout_info(nir_shader *shader,
return progress;
}
+/* This flips gl_PointCoord.y to match Vulkan requirements */
+static bool
+lower_point_coord_cb(nir_builder *b, nir_intrinsic_instr *intr, void *_state)
+{
+ if (intr->intrinsic != nir_intrinsic_load_input)
+ return false;
+
+ if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PNTC)
+ return false;
+
+ b->cursor = nir_after_instr(&intr->instr);
+ nir_def *result = &intr->def;
+ result =
+ nir_vector_insert_imm(b, result,
+ nir_fsub_imm(b, 1.0, nir_channel(b, result, 1)), 1);
+ nir_def_rewrite_uses_after(&intr->def,
+ result, result->parent_instr);
+ return true;
+}
+
+static bool
+v3d_nir_lower_point_coord(nir_shader *s)
+{
+ assert(s->info.stage == MESA_SHADER_FRAGMENT);
+ return nir_shader_intrinsics_pass(s, lower_point_coord_cb,
+ nir_metadata_block_index |
+ nir_metadata_dominance, NULL);
+}
static void
lower_fs_io(nir_shader *nir)
@@ -1043,24 +1055,22 @@ pipeline_populate_v3d_key(struct v3d_key *key,
p_stage->robustness.storage_buffers == robust_buffer_enabled;
key->robust_image_access =
p_stage->robustness.images == robust_image_enabled;
-
- key->environment = V3D_ENVIRONMENT_VULKAN;
}
/* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
* same. For not using prim_mode that is the one already used on v3d
*/
-static const enum pipe_prim_type vk_to_pipe_prim_type[] = {
- [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
- [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
- [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
- [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
- [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
- [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
- [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
- [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
- [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
- [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
+static const enum mesa_prim vk_to_mesa_prim[] = {
+ [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = MESA_PRIM_POINTS,
+ [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = MESA_PRIM_LINES,
+ [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = MESA_PRIM_LINE_STRIP,
+ [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = MESA_PRIM_TRIANGLES,
+ [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = MESA_PRIM_TRIANGLE_STRIP,
+ [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = MESA_PRIM_TRIANGLE_FAN,
+ [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = MESA_PRIM_LINES_ADJACENCY,
+ [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = MESA_PRIM_LINE_STRIP_ADJACENCY,
+ [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = MESA_PRIM_TRIANGLES_ADJACENCY,
+ [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = MESA_PRIM_TRIANGLE_STRIP_ADJACENCY,
};
static const enum pipe_logicop vk_to_pipe_logicop[] = {
@@ -1100,11 +1110,22 @@ pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
const VkPipelineInputAssemblyStateCreateInfo *ia_info =
pCreateInfo->pInputAssemblyState;
- uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
+ uint8_t topology = vk_to_mesa_prim[ia_info->topology];
+
+ key->is_points = (topology == MESA_PRIM_POINTS);
+ key->is_lines = (topology >= MESA_PRIM_LINES &&
+ topology <= MESA_PRIM_LINE_STRIP);
+
+ if (key->is_points) {
+ /* This mask represents state for GL_ARB_point_sprite which is not
+ * relevant to Vulkan.
+ */
+ key->point_sprite_mask = 0;
+
+ /* Vulkan mandates upper left. */
+ key->point_coord_upper_left = true;
+ }
- key->is_points = (topology == PIPE_PRIM_POINTS);
- key->is_lines = (topology >= PIPE_PRIM_LINES &&
- topology <= PIPE_PRIM_LINE_STRIP);
key->has_gs = has_geometry_shader;
const VkPipelineColorBlendStateCreateInfo *cb_info =
@@ -1181,16 +1202,6 @@ pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
else if (util_format_is_pure_sint(fb_pipe_format))
key->int_color_rb |= 1 << i;
}
-
- if (key->is_points) {
- /* This mask represents state for GL_ARB_point_sprite which is not
- * relevant to Vulkan.
- */
- key->point_sprite_mask = 0;
-
- /* Vulkan mandates upper left. */
- key->point_coord_upper_left = true;
- }
}
}
@@ -1271,11 +1282,11 @@ pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
*/
const VkPipelineInputAssemblyStateCreateInfo *ia_info =
pCreateInfo->pInputAssemblyState;
- uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
+ uint8_t topology = vk_to_mesa_prim[ia_info->topology];
/* FIXME: PRIM_POINTS is not enough, in gallium the full check is
- * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
- key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
+ * MESA_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
+ key->per_vertex_point_size = (topology == MESA_PRIM_POINTS);
key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
@@ -1340,8 +1351,10 @@ pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
const VkVertexInputAttributeDescription *desc =
&vi_info->pVertexAttributeDescriptions[i];
assert(desc->location < MAX_VERTEX_ATTRIBS);
- if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
+ if (desc->format == VK_FORMAT_B8G8R8A8_UNORM ||
+ desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) {
key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
+ }
}
}
@@ -1790,7 +1803,7 @@ pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
if (nir) {
assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
- /* A NIR cach hit doesn't avoid the large majority of pipeline stage
+ /* A NIR cache hit doesn't avoid the large majority of pipeline stage
* creation so the cache hit is not recorded in the pipeline feedback
* flags
*/
@@ -1910,6 +1923,11 @@ pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
p_stage_gs != NULL,
get_ucp_enable_mask(p_stage_vs));
+ if (key.is_points) {
+ assert(key.point_coord_upper_left);
+ NIR_PASS(_, p_stage_fs->nir, v3d_nir_lower_point_coord);
+ }
+
VkResult vk_result;
pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
pipeline_compile_shader_variant(p_stage_fs, &key.base, sizeof(key),
@@ -1933,7 +1951,7 @@ pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
const VkPipelineInputAssemblyStateCreateInfo *ia_info =
pCreateInfo->pInputAssemblyState;
- key->topology = vk_to_pipe_prim_type[ia_info->topology];
+ key->topology = vk_to_mesa_prim[ia_info->topology];
const VkPipelineColorBlendStateCreateInfo *cb_info =
raster_enabled ? pCreateInfo->pColorBlendState : NULL;
@@ -1998,8 +2016,10 @@ pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
const VkVertexInputAttributeDescription *desc =
&vi_info->pVertexAttributeDescriptions[i];
assert(desc->location < MAX_VERTEX_ATTRIBS);
- if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
+ if (desc->format == VK_FORMAT_B8G8R8A8_UNORM ||
+ desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) {
key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
+ }
}
assert(pipeline->subpass);
@@ -2130,19 +2150,19 @@ write_creation_feedback(struct v3dv_pipeline *pipeline,
}
}
-static enum shader_prim
+static enum mesa_prim
multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
{
switch (pipeline->topology) {
- case PIPE_PRIM_POINTS:
- return SHADER_PRIM_POINTS;
- case PIPE_PRIM_LINES:
- case PIPE_PRIM_LINE_STRIP:
- return SHADER_PRIM_LINES;
- case PIPE_PRIM_TRIANGLES:
- case PIPE_PRIM_TRIANGLE_STRIP:
- case PIPE_PRIM_TRIANGLE_FAN:
- return SHADER_PRIM_TRIANGLES;
+ case MESA_PRIM_POINTS:
+ return MESA_PRIM_POINTS;
+ case MESA_PRIM_LINES:
+ case MESA_PRIM_LINE_STRIP:
+ return MESA_PRIM_LINES;
+ case MESA_PRIM_TRIANGLES:
+ case MESA_PRIM_TRIANGLE_STRIP:
+ case MESA_PRIM_TRIANGLE_FAN:
+ return MESA_PRIM_TRIANGLES;
default:
/* Since we don't allow GS with multiview, we can only see non-adjacency
* primitives.
@@ -2151,19 +2171,19 @@ multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
}
}
-static enum shader_prim
+static enum mesa_prim
multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
{
switch (pipeline->topology) {
- case PIPE_PRIM_POINTS:
- return SHADER_PRIM_POINTS;
- case PIPE_PRIM_LINES:
- case PIPE_PRIM_LINE_STRIP:
- return SHADER_PRIM_LINE_STRIP;
- case PIPE_PRIM_TRIANGLES:
- case PIPE_PRIM_TRIANGLE_STRIP:
- case PIPE_PRIM_TRIANGLE_FAN:
- return SHADER_PRIM_TRIANGLE_STRIP;
+ case MESA_PRIM_POINTS:
+ return MESA_PRIM_POINTS;
+ case MESA_PRIM_LINES:
+ case MESA_PRIM_LINE_STRIP:
+ return MESA_PRIM_LINE_STRIP;
+ case MESA_PRIM_TRIANGLES:
+ case MESA_PRIM_TRIANGLE_STRIP:
+ case MESA_PRIM_TRIANGLE_FAN:
+ return MESA_PRIM_TRIANGLE_STRIP;
default:
/* Since we don't allow GS with multiview, we can only see non-adjacency
* primitives.
@@ -2232,7 +2252,7 @@ pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
out_layer->data.location = VARYING_SLOT_LAYER;
/* Get the view index value that we will write to gl_Layer */
- nir_ssa_def *layer =
+ nir_def *layer =
nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
/* Emit all output vertices */
@@ -2612,13 +2632,8 @@ v3dv_dynamic_state_mask(VkDynamicState state)
return V3DV_DYNAMIC_LINE_WIDTH;
case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
return V3DV_DYNAMIC_COLOR_WRITE_ENABLE;
-
- /* Depth bounds testing is not available in in V3D 4.2 so here we are just
- * ignoring this dynamic state. We are already asserting at pipeline creation
- * time that depth bounds testing is not enabled.
- */
case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
- return 0;
+ return V3DV_DYNAMIC_DEPTH_BOUNDS;
default:
unreachable("Unhandled dynamic state");
@@ -2636,6 +2651,7 @@ pipeline_init_dynamic_state(
const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
{
/* Initialize to default values */
+ const struct v3d_device_info *devinfo = &pipeline->device->devinfo;
struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
memset(dynamic, 0, sizeof(*dynamic));
dynamic->stencil_compare_mask.front = ~0;
@@ -2643,7 +2659,9 @@ pipeline_init_dynamic_state(
dynamic->stencil_write_mask.front = ~0;
dynamic->stencil_write_mask.back = ~0;
dynamic->line_width = 1.0f;
- dynamic->color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1;
+ dynamic->color_write_enable =
+ (1ull << (4 * V3D_MAX_RENDER_TARGETS(devinfo->ver))) - 1;
+ dynamic->depth_bounds.max = 1.0f;
/* Create a mask of enabled dynamic states */
uint32_t dynamic_states = 0;
@@ -2665,9 +2683,10 @@ pipeline_init_dynamic_state(
pViewportState->viewportCount);
for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
- v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
- dynamic->viewport.scale[i],
- dynamic->viewport.translate[i]);
+ v3dv_X(pipeline->device, viewport_compute_xform)
+ (&dynamic->viewport.viewports[i],
+ dynamic->viewport.scale[i],
+ dynamic->viewport.translate[i]);
}
}
@@ -2695,6 +2714,11 @@ pipeline_init_dynamic_state(
dynamic->stencil_reference.front = pDepthStencilState->front.reference;
dynamic->stencil_reference.back = pDepthStencilState->back.reference;
}
+
+ if (!(dynamic_states & V3DV_DYNAMIC_DEPTH_BOUNDS)) {
+ dynamic->depth_bounds.min = pDepthStencilState->minDepthBounds;
+ dynamic->depth_bounds.max = pDepthStencilState->maxDepthBounds;
+ }
}
if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
@@ -2806,62 +2830,6 @@ pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
}
}
-static bool
-pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
-{
- for (uint8_t i = 0; i < pipeline->va_count; i++) {
- if (vk_format_is_int(pipeline->va[i].vk_format))
- return true;
- }
- return false;
-}
-
-/* @pipeline can be NULL. We assume in that case that all the attributes have
- * a float format (we only create an all-float BO once and we reuse it with
- * all float pipelines), otherwise we look at the actual type of each
- * attribute used with the specific pipeline passed in.
- */
-struct v3dv_bo *
-v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
- struct v3dv_pipeline *pipeline)
-{
- uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
- struct v3dv_bo *bo;
-
- bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
-
- if (!bo) {
- fprintf(stderr, "failed to allocate memory for the default "
- "attribute values\n");
- return NULL;
- }
-
- bool ok = v3dv_bo_map(device, bo, size);
- if (!ok) {
- fprintf(stderr, "failed to map default attribute values buffer\n");
- return false;
- }
-
- uint32_t *attrs = bo->map;
- uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
- for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
- attrs[i * 4 + 0] = 0;
- attrs[i * 4 + 1] = 0;
- attrs[i * 4 + 2] = 0;
- VkFormat attr_format =
- pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
- if (i < va_count && vk_format_is_int(attr_format)) {
- attrs[i * 4 + 3] = 1;
- } else {
- attrs[i * 4 + 3] = fui(1.0);
- }
- }
-
- v3dv_bo_unmap(device, bo);
-
- return bo;
-}
-
static void
pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
const VkPipelineMultisampleStateCreateInfo *ms_info)
@@ -2909,7 +2877,7 @@ pipeline_init(struct v3dv_pipeline *pipeline,
const VkPipelineInputAssemblyStateCreateInfo *ia_info =
pCreateInfo->pInputAssemblyState;
- pipeline->topology = vk_to_pipe_prim_type[ia_info->topology];
+ pipeline->topology = vk_to_mesa_prim[ia_info->topology];
/* If rasterization is not enabled, various CreateInfo structs must be
* ignored.
@@ -2964,7 +2932,9 @@ pipeline_init(struct v3dv_pipeline *pipeline,
/* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
* feature and it shouldn't be used by any pipeline.
*/
- assert(!ds_info || !ds_info->depthBoundsTestEnable);
+ assert(device->devinfo.ver >= 71 ||
+ !ds_info || !ds_info->depthBoundsTestEnable);
+ pipeline->depth_bounds_test_enabled = ds_info && ds_info->depthBoundsTestEnable;
enable_depth_bias(pipeline, rs_info);
@@ -2996,9 +2966,10 @@ pipeline_init(struct v3dv_pipeline *pipeline,
v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
- if (pipeline_has_integer_vertex_attrib(pipeline)) {
+ if (v3dv_X(device, pipeline_needs_default_attribute_values)(pipeline)) {
pipeline->default_attribute_values =
- v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
+ v3dv_X(pipeline->device, create_default_attribute_values)(pipeline->device, pipeline);
+
if (!pipeline->default_attribute_values)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
} else {
@@ -3106,14 +3077,20 @@ shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
}
static void
-lower_cs_shared(struct nir_shader *nir)
+lower_compute(struct nir_shader *nir)
{
if (!nir->info.shared_memory_explicit_layout) {
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared, shared_type_info);
}
+
NIR_PASS(_, nir, nir_lower_explicit_io,
nir_var_mem_shared, nir_address_format_32bit_offset);
+
+ struct nir_lower_compute_system_values_options sysval_options = {
+ .has_base_workgroup_id = true,
+ };
+ NIR_PASS_V(nir, nir_lower_compute_system_values, &sysval_options);
}
static VkResult
@@ -3201,7 +3178,7 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline,
v3d_optimize_nir(NULL, p_stage->nir);
pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
- lower_cs_shared(p_stage->nir);
+ lower_compute(p_stage->nir);
VkResult result = VK_SUCCESS;
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c b/lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c
index bafa8d759..3f58940c7 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c
@@ -542,7 +542,7 @@ shader_variant_create_from_blob(struct v3dv_device *device,
if (blob->overrun)
return NULL;
- uint ulist_data_size = sizeof(uint32_t) * ulist_count;
+ size_t ulist_data_size = sizeof(uint32_t) * ulist_count;
const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
if (blob->overrun)
return NULL;
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_private.h b/lib/mesa/src/broadcom/vulkan/v3dv_private.h
index 91c1ec2f6..21934d802 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_private.h
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_private.h
@@ -38,6 +38,7 @@
#include "vk_descriptor_update_template.h"
#include "vk_device.h"
+#include "vk_device_memory.h"
#include "vk_format.h"
#include "vk_instance.h"
#include "vk_image.h"
@@ -64,6 +65,11 @@
#define VG(x) ((void)0)
#endif
+#ifdef ANDROID
+#include <vndk/hardware_buffer.h>
+#include "util/u_gralloc/u_gralloc.h"
+#endif
+
#include "v3dv_limits.h"
#include "common/v3d_device_info.h"
@@ -123,13 +129,15 @@ struct v3d_simulator_file;
/* Minimum required by the Vulkan 1.1 spec */
#define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
+/* Maximum performance counters number */
+#define V3D_MAX_PERFCNT 93
+
struct v3dv_physical_device {
struct vk_physical_device vk;
char *name;
int32_t render_fd;
int32_t display_fd;
- int32_t master_fd;
/* We need these because it is not clear how to detect
* valid devids in a portable way
@@ -168,7 +176,7 @@ struct v3dv_physical_device {
const struct v3d_compiler *compiler;
uint32_t next_program_id;
- uint64_t heap_used;
+ alignas(8) uint64_t heap_used;
/* This array holds all our 'struct v3dv_bo' allocations. We use this
* so we can add a refcount to our BOs and check if a particular BO
@@ -197,9 +205,6 @@ struct v3dv_physical_device {
} caps;
};
-VkResult v3dv_physical_device_acquire_display(struct v3dv_physical_device *pdevice,
- VkIcdSurfaceBase *surface);
-
static inline struct v3dv_bo *
v3dv_device_lookup_bo(struct v3dv_physical_device *device, uint32_t handle)
{
@@ -222,7 +227,9 @@ void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
uint8_t plane,
+ uint8_t miplevel,
const VkOffset3D *offset,
+ const VkExtent3D *extent,
VkFormat *compat_format);
struct v3dv_instance {
@@ -579,6 +586,9 @@ struct v3dv_device {
* being float being float, allowing us to reuse the same BO for all
* pipelines matching this requirement. Pipelines that need integer
* attributes will create their own BO.
+ *
+ * Note that since v71 the default attribute values are not needed, so this
+ * can be NULL.
*/
struct v3dv_bo *default_attribute_float;
@@ -586,17 +596,12 @@ struct v3dv_device {
struct util_dynarray device_address_bo_list; /* Array of struct v3dv_bo * */
#ifdef ANDROID
- const void *gralloc;
- enum {
- V3DV_GRALLOC_UNKNOWN,
- V3DV_GRALLOC_CROS,
- V3DV_GRALLOC_OTHER,
- } gralloc_type;
+ struct u_gralloc *gralloc;
#endif
};
struct v3dv_device_memory {
- struct vk_object_base base;
+ struct vk_device_memory vk;
struct v3dv_bo *bo;
const VkMemoryType *type;
@@ -670,6 +675,8 @@ struct v3d_resource_slice {
uint32_t offset;
uint32_t stride;
uint32_t padded_height;
+ uint32_t width;
+ uint32_t height;
/* Size of a single pane of the slice. For 3D textures, there will be
* a number of panes equal to the minified, power-of-two-aligned
* depth.
@@ -724,9 +731,18 @@ struct v3dv_image {
VkFormat vk_format;
} planes[V3DV_MAX_PLANE_COUNT];
+ /* Used only when sampling a linear texture (which V3D doesn't support).
+ * This holds a tiled copy of the image we can use for that purpose.
+ */
+ struct v3dv_image *shadow;
+
#ifdef ANDROID
/* Image is backed by VK_ANDROID_native_buffer, */
bool is_native_buffer_memory;
+ /* Image is backed by VK_ANDROID_external_memory_android_hardware_buffer */
+ bool is_ahb;
+ VkImageDrmFormatModifierExplicitCreateInfoEXT *android_explicit_layout;
+ VkSubresourceLayout *android_plane_layouts;
#endif
};
@@ -768,6 +784,8 @@ struct v3dv_image_view {
const struct v3dv_format *format;
+ uint8_t view_swizzle[4];
+
uint8_t plane_count;
struct {
uint8_t image_plane;
@@ -778,8 +796,8 @@ struct v3dv_image_view {
uint32_t internal_type;
uint32_t offset;
- /* Precomputed (composed from createinfo->components and formar swizzle)
- * swizzles to pass in to the shader key.
+ /* Precomputed swizzle (composed from the view swizzle and the format
+ * swizzle).
*
* This could be also included on the descriptor bo, but the shader state
* packet doesn't need it on a bo, so we can just avoid a memory copy
@@ -796,6 +814,11 @@ struct v3dv_image_view {
*/
uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
} planes[V3DV_MAX_PLANE_COUNT];
+
+ /* Used only when sampling a linear texture (which V3D doesn't support).
+ * This would represent a view over the tiled shadow image.
+ */
+ struct v3dv_image_view *shadow;
};
VkResult v3dv_create_image_view(struct v3dv_device *device,
@@ -916,7 +939,7 @@ struct v3dv_framebuffer {
uint32_t layers;
/* Typically, edge tiles in the framebuffer have padding depending on the
- * underlying tiling layout. One consequnce of this is that when the
+ * underlying tiling layout. One consequence of this is that when the
* framebuffer dimensions are not aligned to tile boundaries, tile stores
* would still write full tiles on the edges and write to the padded area.
* If the framebuffer is aliasing a smaller region of a larger image, then
@@ -942,6 +965,7 @@ struct v3dv_frame_tiling {
uint32_t layers;
uint32_t render_target_count;
uint32_t internal_bpp;
+ uint32_t total_color_bpp;
bool msaa;
bool double_buffer;
uint32_t tile_width;
@@ -1036,7 +1060,8 @@ enum v3dv_dynamic_state_bits {
V3DV_DYNAMIC_DEPTH_BIAS = 1 << 6,
V3DV_DYNAMIC_LINE_WIDTH = 1 << 7,
V3DV_DYNAMIC_COLOR_WRITE_ENABLE = 1 << 8,
- V3DV_DYNAMIC_ALL = (1 << 9) - 1,
+ V3DV_DYNAMIC_DEPTH_BOUNDS = 1 << 9,
+ V3DV_DYNAMIC_ALL = (1 << 10) - 1,
};
/* Flags for dirty pipeline state.
@@ -1061,6 +1086,7 @@ enum v3dv_cmd_dirty_bits {
V3DV_CMD_DIRTY_LINE_WIDTH = 1 << 16,
V3DV_CMD_DIRTY_VIEW_INDEX = 1 << 17,
V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE = 1 << 18,
+ V3DV_CMD_DIRTY_DEPTH_BOUNDS = 1 << 19,
};
struct v3dv_dynamic_state {
@@ -1097,6 +1123,11 @@ struct v3dv_dynamic_state {
float slope_factor;
} depth_bias;
+ struct {
+ float min;
+ float max;
+ } depth_bounds;
+
float line_width;
uint32_t color_write_enable;
@@ -1121,7 +1152,6 @@ enum v3dv_job_type {
V3DV_JOB_TYPE_CPU_RESET_QUERIES,
V3DV_JOB_TYPE_CPU_END_QUERY,
V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
- V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
};
@@ -1160,20 +1190,6 @@ struct v3dv_submit_sync_info {
struct vk_sync_signal *signals;
};
-struct v3dv_copy_buffer_to_image_cpu_job_info {
- struct v3dv_image *image;
- struct v3dv_buffer *buffer;
- uint32_t buffer_offset;
- uint32_t buffer_stride;
- uint32_t buffer_layer_stride;
- VkOffset3D image_offset;
- VkExtent3D image_extent;
- uint32_t mip_level;
- uint32_t base_layer;
- uint32_t layer_count;
- uint8_t plane;
-};
-
struct v3dv_csd_indirect_cpu_job_info {
struct v3dv_buffer *buffer;
uint32_t offset;
@@ -1192,7 +1208,7 @@ struct v3dv_timestamp_query_cpu_job_info {
};
/* Number of perfmons required to handle all supported performance counters */
-#define V3DV_MAX_PERFMONS DIV_ROUND_UP(V3D_PERFCNT_NUM, \
+#define V3DV_MAX_PERFMONS DIV_ROUND_UP(V3D_MAX_PERFCNT, \
DRM_V3D_MAX_PERF_COUNTERS)
struct v3dv_perf_query {
@@ -1327,7 +1343,6 @@ struct v3dv_job {
struct v3dv_reset_query_cpu_job_info query_reset;
struct v3dv_end_query_info query_end;
struct v3dv_copy_query_results_cpu_job_info query_copy_results;
- struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
struct v3dv_csd_indirect_cpu_job_info csd_indirect;
struct v3dv_timestamp_query_cpu_job_info query_timestamp;
} cpu;
@@ -1365,6 +1380,7 @@ void v3dv_job_start_frame(struct v3dv_job *job,
bool allocate_tile_state_now,
uint32_t render_target_count,
uint8_t max_internal_bpp,
+ uint8_t total_color_bpp,
bool msaa);
bool v3dv_job_type_is_gpu(struct v3dv_job *job);
@@ -1482,7 +1498,7 @@ struct v3dv_cmd_buffer_state {
/* FIXME: we have just one client-side BO for the push constants,
* independently of the stageFlags in vkCmdPushConstants, and the
* pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
- * tunning in the future if it makes sense.
+ * tuning in the future if it makes sense.
*/
uint32_t push_constants_size;
uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
@@ -1663,7 +1679,7 @@ struct v3dv_query_pool {
/* Only used with performance queries */
struct {
uint32_t ncounters;
- uint8_t counters[V3D_PERFCNT_NUM];
+ uint8_t counters[V3D_MAX_PERFCNT];
/* V3D has a limit on the number of counters we can track in a
* single performance monitor, so if too many counters are requested
@@ -1799,7 +1815,8 @@ void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
struct drm_v3d_submit_tfu *tfu);
-void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,
+void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_device *device,
+ struct v3dv_csd_indirect_cpu_job_info *info,
const uint32_t *wg_counts);
void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
@@ -1827,6 +1844,11 @@ bool v3dv_cmd_buffer_check_needs_store(const struct v3dv_cmd_buffer_state *state
void v3dv_cmd_buffer_emit_pipeline_barrier(struct v3dv_cmd_buffer *cmd_buffer,
const VkDependencyInfoKHR *info);
+bool v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_image *dst,
+ struct v3dv_image *src,
+ const VkImageCopy2 *region);
+
struct v3dv_event {
struct vk_object_base base;
@@ -2156,32 +2178,6 @@ struct v3dv_sampler {
#define V3DV_NO_SAMPLER_16BIT_IDX 0
#define V3DV_NO_SAMPLER_32BIT_IDX 1
-/*
- * Following two methods are using on the combined to/from texture/sampler
- * indices maps at v3dv_pipeline.
- */
-static inline uint32_t
-v3dv_pipeline_combined_index_key_create(uint32_t texture_index,
- uint32_t sampler_index)
-{
- return texture_index << 24 | sampler_index;
-}
-
-static inline void
-v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,
- uint32_t *texture_index,
- uint32_t *sampler_index)
-{
- uint32_t texture = combined_index_key >> 24;
- uint32_t sampler = combined_index_key & 0xffffff;
-
- if (texture_index)
- *texture_index = texture;
-
- if (sampler_index)
- *sampler_index = sampler;
-}
-
struct v3dv_descriptor_maps {
struct v3dv_descriptor_map ubo_map;
struct v3dv_descriptor_map ssbo_map;
@@ -2277,7 +2273,7 @@ struct v3dv_pipeline {
} va[MAX_VERTEX_ATTRIBS];
uint32_t va_count;
- enum pipe_prim_type topology;
+ enum mesa_prim topology;
struct v3dv_pipeline_shared_data *shared_data;
@@ -2285,7 +2281,8 @@ struct v3dv_pipeline {
unsigned char sha1[20];
/* In general we can reuse v3dv_device->default_attribute_float, so note
- * that the following can be NULL.
+ * that the following can be NULL. In 7.x this is not used, so it will be
+ * always NULL.
*
* FIXME: the content of this BO will be small, so it could be improved to
* be uploaded to a common BO. But as in most cases it will be NULL, it is
@@ -2319,6 +2316,9 @@ struct v3dv_pipeline {
bool is_z16;
} depth_bias;
+ /* Depth bounds */
+ bool depth_bounds_test_enabled;
+
struct {
void *mem_ctx;
struct util_dynarray data; /* Array of v3dv_pipeline_executable_data */
@@ -2334,6 +2334,13 @@ struct v3dv_pipeline {
uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
};
+static inline bool
+v3dv_texture_shader_state_has_rb_swap_reverse_bits(const struct v3dv_device *device)
+{
+ return device->devinfo.ver > 71 ||
+ (device->devinfo.ver == 71 && device->devinfo.rev >= 5);
+}
+
static inline VkPipelineBindPoint
v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
{
@@ -2496,10 +2503,6 @@ void
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
struct v3dv_pipeline_cache *cache);
-struct v3dv_bo *
-v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
- struct v3dv_pipeline *pipeline);
-
VkResult
v3dv_create_compute_pipeline_from_nir(struct v3dv_device *device,
nir_shader *nir,
@@ -2522,7 +2525,7 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
VK_OBJECT_TYPE_BUFFER)
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
VK_OBJECT_TYPE_BUFFER_VIEW)
-VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, base, VkDeviceMemory,
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, vk.base, VkDeviceMemory,
VK_OBJECT_TYPE_DEVICE_MEMORY)
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
VK_OBJECT_TYPE_DESCRIPTOR_POOL)
@@ -2604,12 +2607,32 @@ u64_compare(const void *key1, const void *key2)
case 42: \
v3d_X_thing = &v3d42_##thing; \
break; \
+ case 71: \
+ v3d_X_thing = &v3d71_##thing; \
+ break; \
default: \
unreachable("Unsupported hardware generation"); \
} \
v3d_X_thing; \
})
+/* Helper to get hw-specific macro values */
+#define V3DV_X(device, thing) ({ \
+ __typeof(V3D42_##thing) V3D_X_THING; \
+ switch (device->devinfo.ver) { \
+ case 42: \
+ V3D_X_THING = V3D42_##thing; \
+ break; \
+ case 71: \
+ V3D_X_THING = V3D71_##thing; \
+ break; \
+ default: \
+ unreachable("Unsupported hardware generation"); \
+ } \
+ V3D_X_THING; \
+})
+
+
/* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
* define v3dX for each version supported, because when we compile code that
@@ -2622,16 +2645,26 @@ u64_compare(const void *key1, const void *key2)
# define v3dX(x) v3d42_##x
# include "v3dvx_private.h"
# undef v3dX
+
+# define v3dX(x) v3d71_##x
+# include "v3dvx_private.h"
+# undef v3dX
#endif
+VkResult
+v3dv_update_image_layout(struct v3dv_device *device,
+ struct v3dv_image *image,
+ uint64_t modifier,
+ bool disjoint,
+ const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod_info);
+
#ifdef ANDROID
VkResult
-v3dv_gralloc_info(struct v3dv_device *device,
- const VkNativeBufferANDROID *gralloc_info,
- int *out_dmabuf,
- int *out_stride,
- int *out_size,
- uint64_t *out_modifier);
+v3dv_gralloc_to_drm_explicit_layout(struct u_gralloc *gralloc,
+ struct u_gralloc_buffer_handle *in_hnd,
+ VkImageDrmFormatModifierExplicitCreateInfoEXT *out,
+ VkSubresourceLayout *out_layouts,
+ int max_planes);
VkResult
v3dv_import_native_buffer_fd(VkDevice device_h,
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_query.c b/lib/mesa/src/broadcom/vulkan/v3dv_query.c
index 216dd1567..d6f93466d 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_query.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_query.c
@@ -23,7 +23,6 @@
#include "v3dv_private.h"
-#include "common/v3d_performance_counters.h"
#include "util/timespec.h"
#include "compiler/nir/nir_builder.h"
@@ -48,7 +47,7 @@ kperfmon_create(struct v3dv_device *device,
DRM_IOCTL_V3D_PERFMON_CREATE,
&req);
if (ret)
- fprintf(stderr, "Failed to create perfmon: %s\n", strerror(ret));
+ fprintf(stderr, "Failed to create perfmon for query %d: %s\n", query, strerror(ret));
pool->queries[query].perf.kperfmon_ids[i] = req.id;
}
@@ -303,7 +302,6 @@ v3dv_CreateQueryPool(VkDevice _device,
QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR);
assert(pq_info);
- assert(pq_info->counterIndexCount <= V3D_PERFCNT_NUM);
pool->perfmon.ncounters = pq_info->counterIndexCount;
for (uint32_t i = 0; i < pq_info->counterIndexCount; i++)
@@ -592,7 +590,7 @@ write_performance_query_result(struct v3dv_device *device,
assert(pool && pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
struct v3dv_query *q = &pool->queries[query];
- uint64_t counter_values[V3D_PERFCNT_NUM];
+ uint64_t counter_values[V3D_MAX_PERFCNT];
for (uint32_t i = 0; i < pool->perfmon.nperfmons; i++) {
struct drm_v3d_perfmon_get_values req = {
@@ -1284,40 +1282,11 @@ v3dv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
VkPerformanceCounterKHR *pCounters,
VkPerformanceCounterDescriptionKHR *pCounterDescriptions)
{
- uint32_t desc_count = *pCounterCount;
+ V3DV_FROM_HANDLE(v3dv_physical_device, pDevice, physicalDevice);
- VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR,
- out, pCounters, pCounterCount);
- VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR,
- out_desc, pCounterDescriptions, &desc_count);
-
- for (int i = 0; i < ARRAY_SIZE(v3d_performance_counters); i++) {
- vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) {
- counter->unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR;
- counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
- counter->storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR;
-
- unsigned char sha1_result[20];
- _mesa_sha1_compute(v3d_performance_counters[i][V3D_PERFCNT_NAME],
- strlen(v3d_performance_counters[i][V3D_PERFCNT_NAME]),
- sha1_result);
-
- memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
- }
-
- vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR,
- &out_desc, desc) {
- desc->flags = 0;
- snprintf(desc->name, sizeof(desc->name), "%s",
- v3d_performance_counters[i][V3D_PERFCNT_NAME]);
- snprintf(desc->category, sizeof(desc->category), "%s",
- v3d_performance_counters[i][V3D_PERFCNT_CATEGORY]);
- snprintf(desc->description, sizeof(desc->description), "%s",
- v3d_performance_counters[i][V3D_PERFCNT_DESCRIPTION]);
- }
- }
-
- return vk_outarray_status(&out);
+ return v3dv_X(pDevice, enumerate_performance_query_counters)(pCounterCount,
+ pCounters,
+ pCounterDescriptions);
}
VKAPI_ATTR void VKAPI_CALL
@@ -1345,23 +1314,23 @@ v3dv_ReleaseProfilingLockKHR(VkDevice device)
static inline void
nir_set_query_availability(nir_builder *b,
- nir_ssa_def *buf,
- nir_ssa_def *offset,
- nir_ssa_def *query_idx,
- nir_ssa_def *avail)
+ nir_def *buf,
+ nir_def *offset,
+ nir_def *query_idx,
+ nir_def *avail)
{
offset = nir_iadd(b, offset, query_idx); /* we use 1B per query */
nir_store_ssbo(b, avail, buf, offset, .write_mask = 0x1, .align_mul = 1);
}
-static inline nir_ssa_def *
+static inline nir_def *
nir_get_query_availability(nir_builder *b,
- nir_ssa_def *buf,
- nir_ssa_def *offset,
- nir_ssa_def *query_idx)
+ nir_def *buf,
+ nir_def *offset,
+ nir_def *query_idx)
{
offset = nir_iadd(b, offset, query_idx); /* we use 1B per query */
- nir_ssa_def *avail = nir_load_ssbo(b, 1, 8, buf, offset, .align_mul = 1);
+ nir_def *avail = nir_load_ssbo(b, 1, 8, buf, offset, .align_mul = 1);
return nir_i2i32(b, avail);
}
@@ -1372,12 +1341,7 @@ get_set_query_availability_cs()
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
"set query availability cs");
- /* We rely on supergroup packing to maximize SIMD lane occupancy */
- b.shader->info.workgroup_size[0] = 1;
- b.shader->info.workgroup_size[1] = 1;
- b.shader->info.workgroup_size[2] = 1;
-
- nir_ssa_def *buf =
+ nir_def *buf =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 0,
.binding = 0,
@@ -1387,15 +1351,15 @@ get_set_query_availability_cs()
* ever change any of these parameters we need to update how we compute the
* query index here.
*/
- nir_ssa_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
+ nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b), 0);
- nir_ssa_def *offset =
+ nir_def *offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
- nir_ssa_def *query_idx =
+ nir_def *query_idx =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
- nir_ssa_def *avail =
+ nir_def *avail =
nir_load_push_constant(&b, 1, 8, nir_imm_int(&b, 0), .base = 8, .range = 1);
query_idx = nir_iadd(&b, query_idx, wg_id);
@@ -1404,33 +1368,33 @@ get_set_query_availability_cs()
return b.shader;
}
-static inline nir_ssa_def *
-nir_get_occlusion_counter_offset(nir_builder *b, nir_ssa_def *query_idx)
+static inline nir_def *
+nir_get_occlusion_counter_offset(nir_builder *b, nir_def *query_idx)
{
- nir_ssa_def *query_group = nir_udiv_imm(b, query_idx, 16);
- nir_ssa_def *query_group_offset = nir_umod_imm(b, query_idx, 16);
- nir_ssa_def *offset =
- nir_iadd(b, nir_imul(b, query_group, nir_imm_int(b, 1024)),
- nir_imul(b, query_group_offset, nir_imm_int(b, 4)));
+ nir_def *query_group = nir_udiv_imm(b, query_idx, 16);
+ nir_def *query_group_offset = nir_umod_imm(b, query_idx, 16);
+ nir_def *offset =
+ nir_iadd(b, nir_imul_imm(b, query_group, 1024),
+ nir_imul_imm(b, query_group_offset, 4));
return offset;
}
static inline void
nir_reset_occlusion_counter(nir_builder *b,
- nir_ssa_def *buf,
- nir_ssa_def *query_idx)
+ nir_def *buf,
+ nir_def *query_idx)
{
- nir_ssa_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
- nir_ssa_def *zero = nir_imm_int(b, 0);
+ nir_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
+ nir_def *zero = nir_imm_int(b, 0);
nir_store_ssbo(b, zero, buf, offset, .write_mask = 0x1, .align_mul = 4);
}
-static inline nir_ssa_def *
+static inline nir_def *
nir_read_occlusion_counter(nir_builder *b,
- nir_ssa_def *buf,
- nir_ssa_def *query_idx)
+ nir_def *buf,
+ nir_def *query_idx)
{
- nir_ssa_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
+ nir_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
return nir_load_ssbo(b, 1, 32, buf, offset, .access = 0, .align_mul = 4);
}
@@ -1441,12 +1405,7 @@ get_reset_occlusion_query_cs()
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
"reset occlusion query cs");
- /* We rely on supergroup packing to maximize SIMD lane occupancy */
- b.shader->info.workgroup_size[0] = 1;
- b.shader->info.workgroup_size[1] = 1;
- b.shader->info.workgroup_size[2] = 1;
-
- nir_ssa_def *buf =
+ nir_def *buf =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 0,
.binding = 0,
@@ -1456,15 +1415,15 @@ get_reset_occlusion_query_cs()
* ever change any of these parameters we need to update how we compute the
* query index here.
*/
- nir_ssa_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
+ nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b), 0);
- nir_ssa_def *avail_offset =
+ nir_def *avail_offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
- nir_ssa_def *base_query_idx =
+ nir_def *base_query_idx =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
- nir_ssa_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
+ nir_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
nir_set_query_availability(&b, buf, avail_offset, query_idx,
nir_imm_intN_t(&b, 0, 8));
@@ -1475,21 +1434,21 @@ get_reset_occlusion_query_cs()
static void
write_query_buffer(nir_builder *b,
- nir_ssa_def *buf,
- nir_ssa_def **offset,
- nir_ssa_def *value,
+ nir_def *buf,
+ nir_def **offset,
+ nir_def *value,
bool flag_64bit)
{
if (flag_64bit) {
/* Create a 64-bit value using a vec2 with the .Y component set to 0
* so we can write a 64-bit value in a single store.
*/
- nir_ssa_def *value64 = nir_vec2(b, value, nir_imm_int(b, 0));
+ nir_def *value64 = nir_vec2(b, value, nir_imm_int(b, 0));
nir_store_ssbo(b, value64, buf, *offset, .write_mask = 0x3, .align_mul = 8);
- *offset = nir_iadd(b, *offset, nir_imm_int(b, 8));
+ *offset = nir_iadd_imm(b, *offset, 8);
} else {
nir_store_ssbo(b, value, buf, *offset, .write_mask = 0x1, .align_mul = 4);
- *offset = nir_iadd(b, *offset, nir_imm_int(b, 4));
+ *offset = nir_iadd_imm(b, *offset, 4);
}
}
@@ -1504,60 +1463,55 @@ get_copy_query_results_cs(VkQueryResultFlags flags)
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
"copy query results cs");
- /* We rely on supergroup packing to maximize SIMD lane occupancy */
- b.shader->info.workgroup_size[0] = 1;
- b.shader->info.workgroup_size[1] = 1;
- b.shader->info.workgroup_size[2] = 1;
-
- nir_ssa_def *buf =
+ nir_def *buf =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 0,
.binding = 0,
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
- nir_ssa_def *buf_out =
+ nir_def *buf_out =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 1,
.binding = 0,
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
/* Read push constants */
- nir_ssa_def *avail_offset =
+ nir_def *avail_offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
- nir_ssa_def *base_query_idx =
+ nir_def *base_query_idx =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
- nir_ssa_def *base_offset_out =
+ nir_def *base_offset_out =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 8, .range = 4);
- nir_ssa_def *stride =
+ nir_def *stride =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 12, .range = 4);
/* This assumes a local size of 1 and a horizontal-only dispatch. If we
* ever change any of these parameters we need to update how we compute the
* query index here.
*/
- nir_ssa_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
- nir_ssa_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
+ nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b), 0);
+ nir_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
/* Read query availability if needed */
- nir_ssa_def *avail = NULL;
+ nir_def *avail = NULL;
if (flag_avail || !flag_partial)
avail = nir_get_query_availability(&b, buf, avail_offset, query_idx);
/* Write occusion query result... */
- nir_ssa_def *offset =
+ nir_def *offset =
nir_iadd(&b, base_offset_out, nir_imul(&b, wg_id, stride));
/* ...if partial is requested, we always write */
if(flag_partial) {
- nir_ssa_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
+ nir_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
write_query_buffer(&b, buf_out, &offset, query_res, flag_64bit);
} else {
/*...otherwise, we only write if the query is available */
nir_if *if_stmt = nir_push_if(&b, nir_ine_imm(&b, avail, 0));
- nir_ssa_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
+ nir_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
write_query_buffer(&b, buf_out, &offset, query_res, flag_64bit);
nir_pop_if(&b, if_stmt);
}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_queue.c b/lib/mesa/src/broadcom/vulkan/v3dv_queue.c
index 9e1bc702f..a0942cf1c 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_queue.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_queue.c
@@ -135,7 +135,7 @@ handle_reset_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
* we handle those in the CPU.
*/
if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION)
- v3dv_bo_wait(job->device, info->pool->occlusion.bo, PIPE_TIMEOUT_INFINITE);
+ v3dv_bo_wait(job->device, info->pool->occlusion.bo, OS_TIMEOUT_INFINITE);
if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
struct vk_sync_wait waits[info->count];
@@ -296,60 +296,6 @@ handle_copy_query_results_cpu_job(struct v3dv_job *job)
}
static VkResult
-handle_copy_buffer_to_image_cpu_job(struct v3dv_queue *queue,
- struct v3dv_job *job,
- struct v3dv_submit_sync_info *sync_info)
-{
- assert(job->type == V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE);
- struct v3dv_copy_buffer_to_image_cpu_job_info *info =
- &job->cpu.copy_buffer_to_image;
-
- /* Wait for all GPU work to finish first, since we may be accessing
- * the BOs involved in the operation.
- */
- VkResult result = queue_wait_idle(queue, sync_info);
- if (result != VK_SUCCESS)
- return result;
-
- /* Map BOs */
- struct v3dv_bo *dst_bo = info->image->planes[info->plane].mem->bo;
- assert(!dst_bo->map || dst_bo->map_size == dst_bo->size);
- if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size))
- return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
- void *dst_ptr = dst_bo->map;
-
- struct v3dv_bo *src_bo = info->buffer->mem->bo;
- assert(!src_bo->map || src_bo->map_size == src_bo->size);
- if (!src_bo->map && !v3dv_bo_map(job->device, src_bo, src_bo->size))
- return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
- void *src_ptr = src_bo->map;
-
- const struct v3d_resource_slice *slice =
- &info->image->planes[info->plane].slices[info->mip_level];
-
- const struct pipe_box box = {
- info->image_offset.x, info->image_offset.y, info->base_layer,
- info->image_extent.width, info->image_extent.height, info->layer_count,
- };
-
- /* Copy each layer */
- for (uint32_t i = 0; i < info->layer_count; i++) {
- const uint32_t dst_offset =
- v3dv_layer_offset(info->image, info->mip_level,
- info->base_layer + i, info->plane);
- const uint32_t src_offset =
- info->buffer->mem_offset + info->buffer_offset +
- info->buffer_layer_stride * i;
- v3d_store_tiled_image(
- dst_ptr + dst_offset, slice->stride,
- src_ptr + src_offset, info->buffer_stride,
- slice->tiling, info->image->planes[info->plane].cpp, slice->padded_height, &box);
- }
-
- return VK_SUCCESS;
-}
-
-static VkResult
handle_timestamp_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
struct v3dv_submit_sync_info *sync_info)
{
@@ -392,7 +338,7 @@ handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
/* Make sure the GPU is no longer using the indirect buffer*/
assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
- v3dv_bo_wait(queue->device, info->buffer->mem->bo, PIPE_TIMEOUT_INFINITE);
+ v3dv_bo_wait(queue->device, info->buffer->mem->bo, OS_TIMEOUT_INFINITE);
/* Map the indirect buffer and read the dispatch parameters */
assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
@@ -408,7 +354,7 @@ handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
if (memcmp(group_counts, info->csd_job->csd.wg_count,
sizeof(info->csd_job->csd.wg_count)) != 0) {
- v3dv_cmd_buffer_rewrite_indirect_csd_job(info, group_counts);
+ v3dv_cmd_buffer_rewrite_indirect_csd_job(queue->device, info, group_counts);
}
return VK_SUCCESS;
@@ -757,7 +703,7 @@ handle_cl_job(struct v3dv_queue *queue,
if (job->tmu_dirty_rcl)
submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
- /* If the job uses VK_KHR_buffer_device_addess we need to ensure all
+ /* If the job uses VK_KHR_buffer_device_address we need to ensure all
* buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR
* are included.
*/
@@ -923,7 +869,7 @@ handle_csd_job(struct v3dv_queue *queue,
struct drm_v3d_submit_csd *submit = &job->csd.submit;
- /* If the job uses VK_KHR_buffer_device_addess we need to ensure all
+ /* If the job uses VK_KHR_buffer_device_address we need to ensure all
* buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR
* are included.
*/
@@ -1014,8 +960,6 @@ queue_handle_job(struct v3dv_queue *queue,
return handle_end_query_cpu_job(job, counter_pass_idx);
case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
return handle_copy_query_results_cpu_job(job);
- case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE:
- return handle_copy_buffer_to_image_cpu_job(queue, job, sync_info);
case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
return handle_csd_indirect_cpu_job(queue, job, sync_info);
case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY:
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c b/lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c
index f3a98ab7e..098bfb648 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c
@@ -87,7 +87,7 @@ push_constants_bo_free(VkDevice _device,
* This method checks if the ubo used for push constants is needed to be
* updated or not.
*
- * push contants ubo is only used for push constants accessed by a non-const
+ * push constants ubo is only used for push constants accessed by a non-const
* index.
*/
static void
@@ -288,9 +288,10 @@ write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
offset + dynamic_offset);
} else {
if (content == QUNIFORM_UBO_ADDR) {
- /* We reserve index 0 for push constants and artificially increase our
- * indices by one for that reason, fix that now before accessing the
- * descriptor map.
+ /* We reserve UBO index 0 for push constants in Vulkan (and for the
+ * constant buffer in GL) so the compiler always adds one to all UBO
+ * indices, fix it up before we access the descriptor map, since
+ * indices start from 0 there.
*/
assert(index > 0);
index--;
@@ -497,7 +498,6 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
-
for (int i = 0; i < uinfo->count; i++) {
uint32_t data = uinfo->data[i];
@@ -519,13 +519,17 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
cmd_buffer, pipeline, variant->stage);
break;
- case QUNIFORM_VIEWPORT_X_SCALE:
- cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f);
+ case QUNIFORM_VIEWPORT_X_SCALE: {
+ float clipper_xy_granularity = V3DV_X(cmd_buffer->device, CLIPPER_XY_GRANULARITY);
+ cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * clipper_xy_granularity);
break;
+ }
- case QUNIFORM_VIEWPORT_Y_SCALE:
- cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * 256.0f);
+ case QUNIFORM_VIEWPORT_Y_SCALE: {
+ float clipper_xy_granularity = V3DV_X(cmd_buffer->device, CLIPPER_XY_GRANULARITY);
+ cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * clipper_xy_granularity);
break;
+ }
case QUNIFORM_VIEWPORT_Z_OFFSET: {
float translate_z;
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_wsi.c b/lib/mesa/src/broadcom/vulkan/v3dv_wsi.c
index 5efb1ea95..404a64d0e 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_wsi.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_wsi.c
@@ -24,8 +24,6 @@
*/
#include "v3dv_private.h"
-#include "drm-uapi/drm_fourcc.h"
-#include "wsi_common_entrypoints.h"
#include "vk_util.h"
#include "wsi_common.h"
#include "wsi_common_drm.h"
@@ -41,19 +39,7 @@ static bool
v3dv_wsi_can_present_on_device(VkPhysicalDevice _pdevice, int fd)
{
V3DV_FROM_HANDLE(v3dv_physical_device, pdevice, _pdevice);
-
- /* There are some instances with direct display extensions where this may be
- * called before we have ever tried to create a swapchain, and therefore,
- * before we have ever tried to acquire the display device, in which case we
- * have to do it now.
- */
- if (unlikely(pdevice->display_fd < 0 && pdevice->master_fd >= 0)) {
- VkResult result =
- v3dv_physical_device_acquire_display(pdevice, NULL);
- if (result != VK_SUCCESS)
- return false;
- }
-
+ assert(pdevice->display_fd != -1);
return wsi_common_drm_devices_equal(fd, pdevice->display_fd);
}
@@ -66,7 +52,7 @@ v3dv_wsi_init(struct v3dv_physical_device *physical_device)
v3dv_physical_device_to_handle(physical_device),
v3dv_wsi_proc_addr,
&physical_device->vk.instance->alloc,
- physical_device->master_fd, NULL,
+ physical_device->display_fd, NULL,
&(struct wsi_device_options){.sw_device = false});
if (result != VK_SUCCESS)
@@ -89,67 +75,6 @@ v3dv_wsi_finish(struct v3dv_physical_device *physical_device)
&physical_device->vk.instance->alloc);
}
-static void
-constraint_surface_capabilities(VkSurfaceCapabilitiesKHR *caps)
-{
- /* Our display pipeline requires that images are linear, so we cannot
- * ensure that our swapchain images can be sampled. If we are running under
- * a compositor in windowed mode, the DRM modifier negotiation should
- * probably end up selecting an UIF layout for the swapchain images but it
- * may still choose linear and send images directly for scanout if the
- * surface is in fullscreen mode for example. If we are not running under
- * a compositor, then we would always need them to be linear anyway.
- */
- caps->supportedUsageFlags &= ~VK_IMAGE_USAGE_SAMPLED_BIT;
-}
-
-VKAPI_ATTR VkResult VKAPI_CALL
-v3dv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
- VkPhysicalDevice physicalDevice,
- VkSurfaceKHR surface,
- VkSurfaceCapabilitiesKHR* pSurfaceCapabilities)
-{
- VkResult result;
- result = wsi_GetPhysicalDeviceSurfaceCapabilitiesKHR(physicalDevice,
- surface,
- pSurfaceCapabilities);
- constraint_surface_capabilities(pSurfaceCapabilities);
- return result;
-}
-
-VKAPI_ATTR VkResult VKAPI_CALL
-v3dv_GetPhysicalDeviceSurfaceCapabilities2KHR(
- VkPhysicalDevice physicalDevice,
- const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo,
- VkSurfaceCapabilities2KHR* pSurfaceCapabilities)
-{
- VkResult result;
- result = wsi_GetPhysicalDeviceSurfaceCapabilities2KHR(physicalDevice,
- pSurfaceInfo,
- pSurfaceCapabilities);
- constraint_surface_capabilities(&pSurfaceCapabilities->surfaceCapabilities);
- return result;
-}
-
-VKAPI_ATTR VkResult VKAPI_CALL
-v3dv_CreateSwapchainKHR(
- VkDevice _device,
- const VkSwapchainCreateInfoKHR* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkSwapchainKHR* pSwapchain)
-{
- V3DV_FROM_HANDLE(v3dv_device, device, _device);
- struct v3dv_physical_device *pdevice = device->pdevice;
-
- ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, pCreateInfo->surface);
- VkResult result =
- v3dv_physical_device_acquire_display(pdevice, surface);
- if (result != VK_SUCCESS)
- return result;
-
- return wsi_CreateSwapchainKHR(_device, pCreateInfo, pAllocator, pSwapchain);
-}
-
struct v3dv_image *
v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain, uint32_t index)
{
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c
index 0c23a33b5..011f5c8e1 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c
@@ -56,10 +56,15 @@ v3dX(job_emit_enable_double_buffer)(struct v3dv_job *job)
};
config.width_in_pixels = tiling->width;
config.height_in_pixels = tiling->height;
+#if V3D_VERSION == 42
config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
config.multisample_mode_4x = tiling->msaa;
config.double_buffer_in_non_ms_mode = tiling->double_buffer;
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
+#endif
+#if V3D_VERSION >= 71
+ unreachable("HW generation 71 not supported yet.");
+#endif
uint8_t *rewrite_addr = (uint8_t *)job->bcl_tile_binning_mode_ptr;
cl_packet_pack(TILE_BINNING_MODE_CFG)(NULL, rewrite_addr, &config);
@@ -82,10 +87,22 @@ v3dX(job_emit_binning_prolog)(struct v3dv_job *job,
cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
config.width_in_pixels = tiling->width;
config.height_in_pixels = tiling->height;
+#if V3D_VERSION == 42
config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
config.multisample_mode_4x = tiling->msaa;
config.double_buffer_in_non_ms_mode = tiling->double_buffer;
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
+#endif
+#if V3D_VERSION >= 71
+ config.log2_tile_width = log2_tile_size(tiling->tile_width);
+ config.log2_tile_height = log2_tile_size(tiling->tile_height);
+ /* FIXME: ideally we would like next assert on the packet header (as is
+ * general, so also applies to GL). We would need to expand
+ * gen_pack_header for that.
+ */
+ assert(config.log2_tile_width == config.log2_tile_height ||
+ config.log2_tile_width == config.log2_tile_height + 1);
+#endif
}
/* There's definitely nothing in the VCD cache we want. */
@@ -345,6 +362,11 @@ cmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer,
iview->vk.base_array_layer + layer,
image_plane);
+ /* The Clear Buffer bit is not supported for Z/Stencil stores in 7.x and it
+ * is broken in earlier V3D versions.
+ */
+ assert((buffer != Z && buffer != STENCIL && buffer != ZSTENCIL) || !clear);
+
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
store.buffer_to_store = buffer;
store.address = v3dv_cl_address(image->planes[image_plane].mem->bo, layer_offset);
@@ -467,6 +489,30 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
const VkImageAspectFlags aspects =
vk_format_aspects(ds_attachment->desc.format);
+#if V3D_VERSION <= 42
+ /* GFXH-1689: The per-buffer store command's clear buffer bit is broken
+ * for depth/stencil.
+ *
+ * There used to be some confusion regarding the Clear Tile Buffers
+ * Z/S bit also being broken, but we confirmed with Broadcom that this
+ * is not the case, it was just that some other hardware bugs (that we
+ * need to work around, such as GFXH-1461) could cause this bit to behave
+ * incorrectly.
+ *
+ * There used to be another issue where the RTs bit in the Clear Tile
+ * Buffers packet also cleared Z/S, but Broadcom confirmed this is
+ * fixed since V3D 4.1.
+ *
+ * So if we have to emit a clear of depth or stencil we don't use
+ * the per-buffer store clear bit, even if we need to store the buffers,
+ * instead we always have to use the Clear Tile Buffers Z/S bit.
+ * If we have configured the job to do early Z/S clearing, then we
+ * don't want to emit any Clear Tile Buffers command at all here.
+ *
+ * Note that GFXH-1689 is not reproduced in the simulator, where
+ * using the clear buffer bit in depth/stencil stores works fine.
+ */
+
/* Only clear once on the first subpass that uses the attachment */
uint32_t ds_first_subpass = !state->pass->multiview_enabled ?
ds_attachment->first_subpass :
@@ -486,6 +532,17 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
ds_attachment->desc.stencilLoadOp,
subpass->do_stencil_clear_with_draw);
+ use_global_zs_clear = !state->job->early_zs_clear &&
+ (needs_depth_clear || needs_stencil_clear);
+#endif
+#if V3D_VERSION >= 71
+ /* The store command's clear buffer bit cannot be used for Z/S stencil:
+ * since V3D 4.5.6 Z/S buffers are automatically cleared between tiles,
+ * so we don't want to emit redundant clears here.
+ */
+ use_global_zs_clear = false;
+#endif
+
/* Skip the last store if it is not required */
uint32_t ds_last_subpass = !pass->multiview_enabled ?
ds_attachment->last_subpass :
@@ -528,30 +585,6 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
needs_stencil_store = subpass->resolve_stencil;
}
- /* GFXH-1689: The per-buffer store command's clear buffer bit is broken
- * for depth/stencil.
- *
- * There used to be some confusion regarding the Clear Tile Buffers
- * Z/S bit also being broken, but we confirmed with Broadcom that this
- * is not the case, it was just that some other hardware bugs (that we
- * need to work around, such as GFXH-1461) could cause this bit to behave
- * incorrectly.
- *
- * There used to be another issue where the RTs bit in the Clear Tile
- * Buffers packet also cleared Z/S, but Broadcom confirmed this is
- * fixed since V3D 4.1.
- *
- * So if we have to emit a clear of depth or stencil we don't use
- * the per-buffer store clear bit, even if we need to store the buffers,
- * instead we always have to use the Clear Tile Buffers Z/S bit.
- * If we have configured the job to do early Z/S clearing, then we
- * don't want to emit any Clear Tile Buffers command at all here.
- *
- * Note that GFXH-1689 is not reproduced in the simulator, where
- * using the clear buffer bit in depth/stencil stores works fine.
- */
- use_global_zs_clear = !state->job->early_zs_clear &&
- (needs_depth_clear || needs_stencil_clear);
if (needs_depth_store || needs_stencil_store) {
const uint32_t zs_buffer =
v3dv_zs_buffer(needs_depth_store, needs_stencil_store);
@@ -649,10 +682,15 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
* bit and instead we have to emit a single clear of all tile buffers.
*/
if (use_global_zs_clear || use_global_rt_clear) {
+#if V3D_VERSION == 42
cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
clear.clear_z_stencil_buffer = use_global_zs_clear;
clear.clear_all_render_targets = use_global_rt_clear;
}
+#endif
+#if V3D_VERSION >= 71
+ cl_emit(cl, CLEAR_RENDER_TARGETS, clear);
+#endif
}
}
@@ -778,6 +816,103 @@ set_rcl_early_z_config(struct v3dv_job *job,
}
}
+/* Note that for v71, render target cfg packets has just one field that
+ * combined the internal type and clamp mode. For simplicity we keep just one
+ * helper.
+ *
+ * Note: rt_type is in fact a "enum V3DX(Internal_Type)".
+ *
+ * FIXME: for v71 we are not returning all the possible combinations for
+ * render target internal type and clamp. For example for int types we are
+ * always using clamp int, and for 16f we are using clamp none or pos (that
+ * seems to be the equivalent for no-clamp on 4.2), but not pq or hlg. In
+ * summary right now we are just porting what we were doing on 4.2
+ */
+uint32_t
+v3dX(clamp_for_format_and_type)(uint32_t rt_type,
+ VkFormat vk_format)
+{
+#if V3D_VERSION == 42
+ if (vk_format_is_int(vk_format))
+ return V3D_RENDER_TARGET_CLAMP_INT;
+ else if (vk_format_is_srgb(vk_format))
+ return V3D_RENDER_TARGET_CLAMP_NORM;
+ else
+ return V3D_RENDER_TARGET_CLAMP_NONE;
+#endif
+#if V3D_VERSION >= 71
+ switch (rt_type) {
+ case V3D_INTERNAL_TYPE_8I:
+ return V3D_RENDER_TARGET_TYPE_CLAMP_8I_CLAMPED;
+ case V3D_INTERNAL_TYPE_8UI:
+ return V3D_RENDER_TARGET_TYPE_CLAMP_8UI_CLAMPED;
+ case V3D_INTERNAL_TYPE_8:
+ return V3D_RENDER_TARGET_TYPE_CLAMP_8;
+ case V3D_INTERNAL_TYPE_16I:
+ return V3D_RENDER_TARGET_TYPE_CLAMP_16I_CLAMPED;
+ case V3D_INTERNAL_TYPE_16UI:
+ return V3D_RENDER_TARGET_TYPE_CLAMP_16UI_CLAMPED;
+ case V3D_INTERNAL_TYPE_16F:
+ return vk_format_is_srgb(vk_format) ?
+ V3D_RENDER_TARGET_TYPE_CLAMP_16F_CLAMP_NORM :
+ V3D_RENDER_TARGET_TYPE_CLAMP_16F;
+ case V3D_INTERNAL_TYPE_32I:
+ return V3D_RENDER_TARGET_TYPE_CLAMP_32I_CLAMPED;
+ case V3D_INTERNAL_TYPE_32UI:
+ return V3D_RENDER_TARGET_TYPE_CLAMP_32UI_CLAMPED;
+ case V3D_INTERNAL_TYPE_32F:
+ return V3D_RENDER_TARGET_TYPE_CLAMP_32F;
+ default:
+ unreachable("Unknown internal render target type");
+ }
+
+ return V3D_RENDER_TARGET_TYPE_CLAMP_INVALID;
+#endif
+}
+
+static void
+cmd_buffer_render_pass_setup_render_target(struct v3dv_cmd_buffer *cmd_buffer,
+ int rt,
+ uint32_t *rt_bpp,
+#if V3D_VERSION == 42
+ uint32_t *rt_type,
+ uint32_t *rt_clamp)
+#else
+ uint32_t *rt_type_clamp)
+#endif
+{
+ const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+
+ assert(state->subpass_idx < state->pass->subpass_count);
+ const struct v3dv_subpass *subpass =
+ &state->pass->subpasses[state->subpass_idx];
+
+ if (rt >= subpass->color_count)
+ return;
+
+ struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt];
+ const uint32_t attachment_idx = attachment->attachment;
+ if (attachment_idx == VK_ATTACHMENT_UNUSED)
+ return;
+
+ assert(attachment_idx < state->framebuffer->attachment_count &&
+ attachment_idx < state->attachment_alloc_count);
+ struct v3dv_image_view *iview = state->attachments[attachment_idx].image_view;
+ assert(vk_format_is_color(iview->vk.format));
+
+ assert(iview->plane_count == 1);
+ *rt_bpp = iview->planes[0].internal_bpp;
+#if V3D_VERSION == 42
+ *rt_type = iview->planes[0].internal_type;
+ *rt_clamp = v3dX(clamp_for_format_and_type)(iview->planes[0].internal_type,
+ iview->vk.format);
+#endif
+#if V3D_VERSION >= 71
+ *rt_type_clamp = v3dX(clamp_for_format_and_type)(iview->planes[0].internal_type,
+ iview->vk.format);
+#endif
+}
+
void
v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
{
@@ -824,7 +959,19 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
config.number_of_render_targets = MAX2(subpass->color_count, 1);
config.multisample_mode_4x = tiling->msaa;
config.double_buffer_in_non_ms_mode = tiling->double_buffer;
+#if V3D_VERSION == 42
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
+#endif
+#if V3D_VERSION >= 71
+ config.log2_tile_width = log2_tile_size(tiling->tile_width);
+ config.log2_tile_height = log2_tile_size(tiling->tile_height);
+ /* FIXME: ideallly we would like next assert on the packet header (as is
+ * general, so also applies to GL). We would need to expand
+ * gen_pack_header for that.
+ */
+ assert(config.log2_tile_width == config.log2_tile_height ||
+ config.log2_tile_width == config.log2_tile_height + 1);
+#endif
if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
const struct v3dv_image_view *iview =
@@ -851,6 +998,10 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
* Early-Z/S clearing is independent of Early Z/S testing, so it is
* possible to enable one but not the other so long as their
* respective requirements are met.
+ *
+ * From V3D 4.5.6, Z/S buffers are always cleared automatically
+ * between tiles, but we still want to enable early ZS clears
+ * when Z/S are not loaded or stored.
*/
struct v3dv_render_pass_attachment *ds_attachment =
&pass->attachments[ds_attachment_idx];
@@ -858,21 +1009,33 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
const VkImageAspectFlags ds_aspects =
vk_format_aspects(ds_attachment->desc.format);
- bool needs_depth_clear =
- check_needs_clear(state,
- ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
- ds_attachment->first_subpass,
- ds_attachment->desc.loadOp,
- subpass->do_depth_clear_with_draw);
-
bool needs_depth_store =
v3dv_cmd_buffer_check_needs_store(state,
ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
ds_attachment->last_subpass,
ds_attachment->desc.storeOp) ||
subpass->resolve_depth;
+#if V3D_VERSION <= 42
+ bool needs_depth_clear =
+ check_needs_clear(state,
+ ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
+ ds_attachment->first_subpass,
+ ds_attachment->desc.loadOp,
+ subpass->do_depth_clear_with_draw);
do_early_zs_clear = needs_depth_clear && !needs_depth_store;
+#endif
+#if V3D_VERSION >= 71
+ bool needs_depth_load =
+ v3dv_cmd_buffer_check_needs_load(state,
+ ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
+ ds_attachment->first_subpass,
+ ds_attachment->desc.loadOp,
+ ds_attachment->last_subpass,
+ ds_attachment->desc.storeOp);
+ do_early_zs_clear = !needs_depth_load && !needs_depth_store;
+#endif
+
if (do_early_zs_clear &&
vk_format_has_stencil(ds_attachment->desc.format)) {
bool needs_stencil_load =
@@ -905,10 +1068,20 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
*/
job->early_zs_clear = do_early_zs_clear;
+#if V3D_VERSION >= 71
+ uint32_t base_addr = 0;
+#endif
for (uint32_t i = 0; i < subpass->color_count; i++) {
uint32_t attachment_idx = subpass->color_attachments[i].attachment;
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
+ if (attachment_idx == VK_ATTACHMENT_UNUSED) {
+#if V3D_VERSION >= 71
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
+ rt.render_target_number = i;
+ rt.stride = 1; /* Unused */
+ }
+#endif
continue;
+ }
struct v3dv_image_view *iview =
state->attachments[attachment_idx].image_view;
@@ -920,10 +1093,10 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
const struct v3d_resource_slice *slice =
&image->planes[plane].slices[iview->vk.base_mip_level];
- const uint32_t *clear_color =
+ UNUSED const uint32_t *clear_color =
&state->attachments[attachment_idx].clear_value.color[0];
- uint32_t clear_pad = 0;
+ UNUSED uint32_t clear_pad = 0;
if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
slice->tiling == V3D_TILING_UIF_XOR) {
int uif_block_height = v3d_utile_height(image->planes[plane].cpp) * 2;
@@ -937,6 +1110,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
}
}
+#if V3D_VERSION == 42
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
clear.clear_color_low_32_bits = clear_color[0];
clear.clear_color_next_24_bits = clear_color[1] & 0xffffff;
@@ -960,22 +1134,74 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
clear.render_target_number = i;
};
}
+#endif
+
+#if V3D_VERSION >= 71
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
+ rt.clear_color_low_bits = clear_color[0];
+ cmd_buffer_render_pass_setup_render_target(cmd_buffer, i, &rt.internal_bpp,
+ &rt.internal_type_and_clamping);
+ rt.stride =
+ v3d_compute_rt_row_row_stride_128_bits(tiling->tile_width,
+ v3d_internal_bpp_words(rt.internal_bpp));
+ rt.base_address = base_addr;
+ rt.render_target_number = i;
+
+ /* base_addr in multiples of 512 bits. We divide by 8 because stride
+ * is in 128-bit units, but it is packing 2 rows worth of data, so we
+ * need to divide it by 2 so it is only 1 row, and then again by 4 so
+ * it is in 512-bit units.
+ */
+ base_addr += (tiling->tile_height * rt.stride) / 8;
+ }
+
+ if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) {
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) {
+ rt.clear_color_mid_bits = /* 40 bits (32 + 8) */
+ ((uint64_t) clear_color[1]) |
+ (((uint64_t) (clear_color[2] & 0xff)) << 32);
+ rt.render_target_number = i;
+ }
+ }
+
+ if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_128) {
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) {
+ rt.clear_color_top_bits = /* 56 bits (24 + 32) */
+ (((uint64_t) (clear_color[2] & 0xffffff00)) >> 8) |
+ (((uint64_t) (clear_color[3])) << 24);
+ rt.render_target_number = i;
+ }
+ }
+#endif
+ }
+
+#if V3D_VERSION >= 71
+ /* If we don't have any color RTs, we still need to emit one and flag
+ * it as not used using stride = 1.
+ */
+ if (subpass->color_count == 0) {
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
+ rt.stride = 1;
+ }
}
+#endif
+#if V3D_VERSION == 42
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
- v3dX(cmd_buffer_render_pass_setup_render_target)
+ cmd_buffer_render_pass_setup_render_target
(cmd_buffer, 0, &rt.render_target_0_internal_bpp,
&rt.render_target_0_internal_type, &rt.render_target_0_clamp);
- v3dX(cmd_buffer_render_pass_setup_render_target)
+ cmd_buffer_render_pass_setup_render_target
(cmd_buffer, 1, &rt.render_target_1_internal_bpp,
&rt.render_target_1_internal_type, &rt.render_target_1_clamp);
- v3dX(cmd_buffer_render_pass_setup_render_target)
+ cmd_buffer_render_pass_setup_render_target
(cmd_buffer, 2, &rt.render_target_2_internal_bpp,
&rt.render_target_2_internal_type, &rt.render_target_2_clamp);
- v3dX(cmd_buffer_render_pass_setup_render_target)
+ cmd_buffer_render_pass_setup_render_target
(cmd_buffer, 3, &rt.render_target_3_internal_bpp,
&rt.render_target_3_internal_type, &rt.render_target_3_clamp);
}
+#endif
/* Ends rendering mode config. */
if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
@@ -1036,10 +1262,15 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
}
if (cmd_buffer->state.tile_aligned_render_area &&
(i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
+#if V3D_VERSION == 42
cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
clear.clear_z_stencil_buffer = !job->early_zs_clear;
clear.clear_all_render_targets = true;
}
+#endif
+#if V3D_VERSION >= 71
+ cl_emit(rcl, CLEAR_RENDER_TARGETS, clear_rt);
+#endif
}
cl_emit(rcl, END_OF_TILE_MARKER, end);
}
@@ -1055,6 +1286,43 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
}
void
+v3dX(viewport_compute_xform)(const VkViewport *viewport,
+ float scale[3],
+ float translate[3])
+{
+ float x = viewport->x;
+ float y = viewport->y;
+ float half_width = 0.5f * viewport->width;
+ float half_height = 0.5f * viewport->height;
+ double n = viewport->minDepth;
+ double f = viewport->maxDepth;
+
+ scale[0] = half_width;
+ translate[0] = half_width + x;
+ scale[1] = half_height;
+ translate[1] = half_height + y;
+
+ scale[2] = (f - n);
+ translate[2] = n;
+
+ /* It seems that if the scale is small enough the hardware won't clip
+ * correctly so we work around this my choosing the smallest scale that
+ * seems to work.
+ *
+ * This case is exercised by CTS:
+ * dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero
+ *
+ * V3D 7.x fixes this by using the new
+ * CLIPPER_Z_SCALE_AND_OFFSET_NO_GUARDBAND.
+ */
+#if V3D_VERSION <= 42
+ const float min_abs_scale = 0.0005f;
+ if (fabs(scale[2]) < min_abs_scale)
+ scale[2] = scale[2] < 0 ? -min_abs_scale : min_abs_scale;
+#endif
+}
+
+void
v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer)
{
struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
@@ -1078,19 +1346,45 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer)
v3dv_cl_ensure_space_with_branch(&job->bcl, required_cl_size);
v3dv_return_if_oom(cmd_buffer, NULL);
+#if V3D_VERSION == 42
cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f;
clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f;
}
+#endif
+#if V3D_VERSION >= 71
+ cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
+ clip.viewport_half_width_in_1_64th_of_pixel = vpscale[0] * 64.0f;
+ clip.viewport_half_height_in_1_64th_of_pixel = vpscale[1] * 64.0f;
+ }
+#endif
float translate_z, scale_z;
v3dv_cmd_buffer_state_get_viewport_z_xform(&cmd_buffer->state, 0,
&translate_z, &scale_z);
+#if V3D_VERSION == 42
cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
clip.viewport_z_offset_zc_to_zs = translate_z;
clip.viewport_z_scale_zc_to_zs = scale_z;
}
+#endif
+
+#if V3D_VERSION >= 71
+ /* If the Z scale is too small guardband clipping may not clip correctly */
+ if (fabsf(scale_z) < 0.01f) {
+ cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET_NO_GUARDBAND, clip) {
+ clip.viewport_z_offset_zc_to_zs = translate_z;
+ clip.viewport_z_scale_zc_to_zs = scale_z;
+ }
+ } else {
+ cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
+ clip.viewport_z_offset_zc_to_zs = translate_z;
+ clip.viewport_z_scale_zc_to_zs = scale_z;
+ }
+ }
+#endif
+
cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
/* Vulkan's default Z NDC is [0..1]. If 'negative_one_to_one' is enabled,
* we are using OpenGL's [-1, 1] instead.
@@ -1103,8 +1397,28 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer)
}
cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
- vp.viewport_centre_x_coordinate = vptranslate[0];
- vp.viewport_centre_y_coordinate = vptranslate[1];
+ float vp_fine_x = vptranslate[0];
+ float vp_fine_y = vptranslate[1];
+ int32_t vp_coarse_x = 0;
+ int32_t vp_coarse_y = 0;
+
+ /* The fine coordinates must be unsigned, but coarse can be signed */
+ if (unlikely(vp_fine_x < 0)) {
+ int32_t blocks_64 = DIV_ROUND_UP(fabsf(vp_fine_x), 64);
+ vp_fine_x += 64.0f * blocks_64;
+ vp_coarse_x -= blocks_64;
+ }
+
+ if (unlikely(vp_fine_y < 0)) {
+ int32_t blocks_64 = DIV_ROUND_UP(fabsf(vp_fine_y), 64);
+ vp_fine_y += 64.0f * blocks_64;
+ vp_coarse_y -= blocks_64;
+ }
+
+ vp.fine_x = vp_fine_x;
+ vp.fine_y = vp_fine_y;
+ vp.coarse_x = vp_coarse_x;
+ vp.coarse_y = vp_coarse_y;
}
cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEWPORT;
@@ -1185,8 +1499,10 @@ v3dX(cmd_buffer_emit_depth_bias)(struct v3dv_cmd_buffer *cmd_buffer)
cl_emit(&job->bcl, DEPTH_OFFSET, bias) {
bias.depth_offset_factor = dynamic->depth_bias.slope_factor;
bias.depth_offset_units = dynamic->depth_bias.constant_factor;
+#if V3D_VERSION <= 42
if (pipeline->depth_bias.is_z16)
bias.depth_offset_units *= 256.0f;
+#endif
bias.limit = dynamic->depth_bias.depth_bias_clamp;
}
@@ -1194,6 +1510,38 @@ v3dX(cmd_buffer_emit_depth_bias)(struct v3dv_cmd_buffer *cmd_buffer)
}
void
+v3dX(cmd_buffer_emit_depth_bounds)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ /* No depthBounds support for v42, so this method is empty in that case.
+ *
+ * Note that this method is being called as v3dv_job_init flags all state
+ * as dirty. See FIXME note in v3dv_job_init.
+ */
+
+#if V3D_VERSION >= 71
+ struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ assert(pipeline);
+
+ if (!pipeline->depth_bounds_test_enabled)
+ return;
+
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(DEPTH_BOUNDS_TEST_LIMITS));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
+ cl_emit(&job->bcl, DEPTH_BOUNDS_TEST_LIMITS, bounds) {
+ bounds.lower_test_limit = dynamic->depth_bounds.min;
+ bounds.upper_test_limit = dynamic->depth_bounds.max;
+ }
+
+ cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_DEPTH_BOUNDS;
+#endif
+}
+
+void
v3dX(cmd_buffer_emit_line_width)(struct v3dv_cmd_buffer *cmd_buffer)
{
struct v3dv_job *job = cmd_buffer->state.job;
@@ -1236,10 +1584,13 @@ v3dX(cmd_buffer_emit_blend)(struct v3dv_cmd_buffer *cmd_buffer)
struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
assert(pipeline);
+ const struct v3d_device_info *devinfo = &cmd_buffer->device->devinfo;
+ const uint32_t max_color_rts = V3D_MAX_RENDER_TARGETS(devinfo->ver);
+
const uint32_t blend_packets_size =
cl_packet_length(BLEND_ENABLES) +
cl_packet_length(BLEND_CONSTANT_COLOR) +
- cl_packet_length(BLEND_CFG) * V3D_MAX_DRAW_BUFFERS;
+ cl_packet_length(BLEND_CFG) * max_color_rts;
v3dv_cl_ensure_space_with_branch(&job->bcl, blend_packets_size);
v3dv_return_if_oom(cmd_buffer, NULL);
@@ -1251,7 +1602,7 @@ v3dX(cmd_buffer_emit_blend)(struct v3dv_cmd_buffer *cmd_buffer)
}
}
- for (uint32_t i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
+ for (uint32_t i = 0; i < max_color_rts; i++) {
if (pipeline->blend.enables & (1 << i))
cl_emit_prepacked(&job->bcl, &pipeline->blend.cfg[i]);
}
@@ -1278,9 +1629,15 @@ v3dX(cmd_buffer_emit_color_write_mask)(struct v3dv_cmd_buffer *cmd_buffer)
struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
+ uint32_t color_write_mask = ~dynamic->color_write_enable |
+ pipeline->blend.color_write_masks;
+#if V3D_VERSION <= 42
+ /* Only 4 RTs */
+ color_write_mask &= 0xffff;
+#endif
+
cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
- mask.mask = (~dynamic->color_write_enable |
- pipeline->blend.color_write_masks) & 0xffff;
+ mask.mask = color_write_mask;
}
cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE;
@@ -1571,15 +1928,16 @@ v3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer)
struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
assert(pipeline);
- bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer);
-
v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS));
v3dv_return_if_oom(cmd_buffer, NULL);
cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) {
+#if V3D_VERSION == 42
+ bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer);
config.early_z_enable = enable_ez;
config.early_z_updates_enable = config.early_z_enable &&
pipeline->z_updates_enable;
+#endif
}
}
@@ -1825,7 +2183,9 @@ emit_gs_shader_state_record(struct v3dv_job *job,
gs_bin->prog_data.gs->base.threads == 4;
shader.geometry_bin_mode_shader_start_in_final_thread_section =
gs_bin->prog_data.gs->base.single_seg;
+#if V3D_VERSION <= 42
shader.geometry_bin_mode_shader_propagate_nans = true;
+#endif
shader.geometry_bin_mode_shader_uniforms_address =
gs_bin_uniforms;
@@ -1835,21 +2195,23 @@ emit_gs_shader_state_record(struct v3dv_job *job,
gs->prog_data.gs->base.threads == 4;
shader.geometry_render_mode_shader_start_in_final_thread_section =
gs->prog_data.gs->base.single_seg;
+#if V3D_VERSION <= 42
shader.geometry_render_mode_shader_propagate_nans = true;
+#endif
shader.geometry_render_mode_shader_uniforms_address =
gs_render_uniforms;
}
}
static uint8_t
-v3d_gs_output_primitive(enum shader_prim prim_type)
+v3d_gs_output_primitive(enum mesa_prim prim_type)
{
switch (prim_type) {
- case SHADER_PRIM_POINTS:
+ case MESA_PRIM_POINTS:
return GEOMETRY_SHADER_POINTS;
- case SHADER_PRIM_LINE_STRIP:
+ case MESA_PRIM_LINE_STRIP:
return GEOMETRY_SHADER_LINE_STRIP;
- case SHADER_PRIM_TRIANGLE_STRIP:
+ case MESA_PRIM_TRIANGLE_STRIP:
return GEOMETRY_SHADER_TRI_STRIP;
default:
unreachable("Unsupported primitive type");
@@ -2011,10 +2373,12 @@ v3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer)
pipeline->vpm_cfg.Gv);
}
+#if V3D_VERSION == 42
struct v3dv_bo *default_attribute_values =
pipeline->default_attribute_values != NULL ?
pipeline->default_attribute_values :
pipeline->device->default_attribute_float;
+#endif
cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD,
pipeline->shader_state_record, shader) {
@@ -2040,8 +2404,10 @@ v3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer)
shader.vertex_shader_uniforms_address = cmd_buffer->state.uniforms.vs;
shader.fragment_shader_uniforms_address = cmd_buffer->state.uniforms.fs;
+#if V3D_VERSION == 42
shader.address_of_default_attribute_values =
v3dv_cl_address(default_attribute_values, 0);
+#endif
shader.any_shader_reads_hardware_written_primitive_id =
(pipeline->has_gs && prog_data_gs->uses_pid) || prog_data_fs->uses_pid;
@@ -2350,40 +2716,3 @@ v3dX(cmd_buffer_emit_indexed_indirect)(struct v3dv_cmd_buffer *cmd_buffer,
buffer->mem_offset + offset);
}
}
-
-void
-v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer,
- int rt,
- uint32_t *rt_bpp,
- uint32_t *rt_type,
- uint32_t *rt_clamp)
-{
- const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
-
- assert(state->subpass_idx < state->pass->subpass_count);
- const struct v3dv_subpass *subpass =
- &state->pass->subpasses[state->subpass_idx];
-
- if (rt >= subpass->color_count)
- return;
-
- struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt];
- const uint32_t attachment_idx = attachment->attachment;
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
- return;
-
- assert(attachment_idx < state->framebuffer->attachment_count &&
- attachment_idx < state->attachment_alloc_count);
- struct v3dv_image_view *iview = state->attachments[attachment_idx].image_view;
- assert(vk_format_is_color(iview->vk.format));
-
- assert(iview->plane_count == 1);
- *rt_bpp = iview->planes[0].internal_bpp;
- *rt_type = iview->planes[0].internal_type;
- if (vk_format_is_int(iview->vk.view_format))
- *rt_clamp = V3D_RENDER_TARGET_CLAMP_INT;
- else if (vk_format_is_srgb(iview->vk.view_format))
- *rt_clamp = V3D_RENDER_TARGET_CLAMP_NORM;
- else
- *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
-}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_device.c b/lib/mesa/src/broadcom/vulkan/v3dvx_device.c
index e23598386..1b50d51e1 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dvx_device.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_device.c
@@ -49,8 +49,8 @@ vk_to_v3d_compare_func[] = {
[VK_COMPARE_OP_ALWAYS] = V3D_COMPARE_FUNC_ALWAYS,
};
-
static union pipe_color_union encode_border_color(
+ const struct v3dv_device *device,
const VkSamplerCustomBorderColorCreateInfoEXT *bc_info)
{
const struct util_format_description *desc =
@@ -77,12 +77,28 @@ static union pipe_color_union encode_border_color(
* colors so we need to fix up the swizzle manually for this case.
*/
uint8_t swizzle[4];
- if (v3dv_format_swizzle_needs_reverse(format->planes[0].swizzle) &&
+ const bool v3d_has_reverse_swap_rb_bits =
+ v3dv_texture_shader_state_has_rb_swap_reverse_bits(device);
+ if (!v3d_has_reverse_swap_rb_bits &&
+ v3dv_format_swizzle_needs_reverse(format->planes[0].swizzle) &&
v3dv_format_swizzle_needs_rb_swap(format->planes[0].swizzle)) {
swizzle[0] = PIPE_SWIZZLE_W;
swizzle[1] = PIPE_SWIZZLE_X;
swizzle[2] = PIPE_SWIZZLE_Y;
swizzle[3] = PIPE_SWIZZLE_Z;
+ }
+ /* In v3d 7.x we no longer have a reverse flag for the border color. Instead
+ * we have to use the new reverse and swap_r/b flags in the texture shader
+ * state which will apply the format swizzle automatically when sampling
+ * the border color too and we should not apply it manually here.
+ */
+ else if (v3d_has_reverse_swap_rb_bits &&
+ (v3dv_format_swizzle_needs_rb_swap(format->planes[0].swizzle) ||
+ v3dv_format_swizzle_needs_reverse(format->planes[0].swizzle))) {
+ swizzle[0] = PIPE_SWIZZLE_X;
+ swizzle[1] = PIPE_SWIZZLE_Y;
+ swizzle[2] = PIPE_SWIZZLE_Z;
+ swizzle[3] = PIPE_SWIZZLE_W;
} else {
memcpy(swizzle, format->planes[0].swizzle, sizeof (swizzle));
}
@@ -118,7 +134,11 @@ static union pipe_color_union encode_border_color(
(1 << (desc->channel[i].size - 1)) - 1);
}
- /* convert from float to expected format */
+#if V3D_VERSION <= 42
+ /* The TMU in V3D 7.x always takes 32-bit floats and handles conversions
+ * for us. In V3D 4.x we need to manually convert floating point color
+ * values to the expected format.
+ */
if (vk_format_is_srgb(bc_info->format) ||
vk_format_is_compressed(bc_info->format)) {
for (int i = 0; i < 4; i++)
@@ -170,12 +190,14 @@ static union pipe_color_union encode_border_color(
}
}
}
+#endif
return border;
}
void
-v3dX(pack_sampler_state)(struct v3dv_sampler *sampler,
+v3dX(pack_sampler_state)(const struct v3dv_device *device,
+ struct v3dv_sampler *sampler,
const VkSamplerCreateInfo *pCreateInfo,
const VkSamplerCustomBorderColorCreateInfoEXT *bc_info)
{
@@ -217,7 +239,7 @@ v3dX(pack_sampler_state)(struct v3dv_sampler *sampler,
s.border_color_mode = border_color_mode;
if (s.border_color_mode == V3D_BORDER_COLOR_FOLLOWS) {
- union pipe_color_union border = encode_border_color(bc_info);
+ union pipe_color_union border = encode_border_color(device, bc_info);
s.border_color_word_0 = border.ui[0];
s.border_color_word_1 = border.ui[1];
@@ -253,11 +275,13 @@ v3dX(framebuffer_compute_internal_bpp_msaa)(
const struct v3dv_framebuffer *framebuffer,
const struct v3dv_cmd_buffer_attachment_state *attachments,
const struct v3dv_subpass *subpass,
- uint8_t *max_bpp,
+ uint8_t *max_internal_bpp,
+ uint8_t *total_color_bpp,
bool *msaa)
{
STATIC_ASSERT(V3D_INTERNAL_BPP_32 == 0);
- *max_bpp = V3D_INTERNAL_BPP_32;
+ *max_internal_bpp = V3D_INTERNAL_BPP_32;
+ *total_color_bpp = 0;
*msaa = false;
if (subpass) {
@@ -270,8 +294,11 @@ v3dX(framebuffer_compute_internal_bpp_msaa)(
assert(att);
assert(att->plane_count == 1);
- if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT)
- *max_bpp = MAX2(*max_bpp, att->planes[0].internal_bpp);
+ if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
+ const uint32_t internal_bpp = att->planes[0].internal_bpp;
+ *max_internal_bpp = MAX2(*max_internal_bpp, internal_bpp);
+ *total_color_bpp += 4 * v3d_internal_bpp_words(internal_bpp);
+ }
if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT)
*msaa = true;
@@ -285,7 +312,6 @@ v3dX(framebuffer_compute_internal_bpp_msaa)(
if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT)
*msaa = true;
}
-
return;
}
@@ -295,8 +321,11 @@ v3dX(framebuffer_compute_internal_bpp_msaa)(
assert(att);
assert(att->plane_count == 1);
- if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT)
- *max_bpp = MAX2(*max_bpp, att->planes[0].internal_bpp);
+ if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
+ const uint32_t internal_bpp = att->planes[0].internal_bpp;
+ *max_internal_bpp = MAX2(*max_internal_bpp, internal_bpp);
+ *total_color_bpp += 4 * v3d_internal_bpp_words(internal_bpp);
+ }
if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT)
*msaa = true;
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_formats.c b/lib/mesa/src/broadcom/vulkan/v3dvx_formats.c
index 45a1cf65b..2392e8367 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dvx_formats.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_formats.c
@@ -155,6 +155,7 @@ static const struct v3dv_format format_table[] = {
FORMAT(A8B8G8R8_SRGB_PACK32, SRGB8_ALPHA8, RGBA8, SWIZ_XYZW, 16, true), /* RGBA8 sRGB */
FORMAT(A2B10G10R10_UNORM_PACK32,RGB10_A2, RGB10_A2, SWIZ_XYZW, 16, true),
FORMAT(A2B10G10R10_UINT_PACK32, RGB10_A2UI, RGB10_A2UI, SWIZ_XYZW, 16, false),
+ FORMAT(A2R10G10B10_UNORM_PACK32,RGB10_A2, RGB10_A2, SWIZ_ZYXW, 16, true),
FORMAT(E5B9G9R9_UFLOAT_PACK32, NO, RGB9_E5, SWIZ_XYZ1, 16, true),
FORMAT(B10G11R11_UFLOAT_PACK32, R11F_G11F_B10F,R11F_G11F_B10F, SWIZ_XYZ1, 16, true),
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_image.c b/lib/mesa/src/broadcom/vulkan/v3dvx_image.c
index 80a3e5bfd..de984e812 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dvx_image.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_image.c
@@ -76,8 +76,6 @@ pack_texture_shader_state_helper(struct v3dv_device *device,
tex.swizzle_b = v3d_translate_pipe_swizzle(image_view->planes[plane].swizzle[2]);
tex.swizzle_a = v3d_translate_pipe_swizzle(image_view->planes[plane].swizzle[3]);
- tex.reverse_standard_border_color = image_view->planes[plane].channel_reverse;
-
tex.texture_type = image_view->format->planes[plane].tex_type;
if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
@@ -110,8 +108,6 @@ pack_texture_shader_state_helper(struct v3dv_device *device,
tex.array_stride_64_byte_aligned = image->planes[iplane].cube_map_stride / 64;
- tex.srgb = vk_format_is_srgb(image_view->vk.view_format);
-
/* At this point we don't have the job. That's the reason the first
* parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to
* add the bo to the job. This also means that we need to add manually
@@ -122,6 +118,51 @@ pack_texture_shader_state_helper(struct v3dv_device *device,
v3dv_layer_offset(image, 0, image_view->vk.base_array_layer,
iplane);
tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
+
+ bool is_srgb = vk_format_is_srgb(image_view->vk.format);
+
+ /* V3D 4.x doesn't have the reverse and swap_r/b bits, so we compose
+ * the reverse and/or swap_r/b swizzle from the format table with the
+ * image view swizzle. This, however, doesn't work for border colors,
+ * for that there is the reverse_standard_border_color.
+ *
+ * In v3d 7.x, however, there is no reverse_standard_border_color bit,
+ * since the reverse and swap_r/b bits also affect border colors. It is
+ * because of this that we absolutely need to use these bits with
+ * reversed and swpaped formats, since that's the only way to ensure
+ * correct border colors. In that case we don't want to program the
+ * swizzle to the composition of the format swizzle and the view
+ * swizzle like we do in v3d 4.x, since the format swizzle is applied
+ * via the reverse and swap_r/b bits.
+ */
+#if V3D_VERSION == 42
+ tex.srgb = is_srgb;
+ tex.reverse_standard_border_color =
+ image_view->planes[plane].channel_reverse;
+#endif
+#if V3D_VERSION >= 71
+ tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE;
+
+ tex.reverse = image_view->planes[plane].channel_reverse;
+ tex.r_b_swap = image_view->planes[plane].swap_rb;
+
+ if (tex.reverse || tex.r_b_swap) {
+ tex.swizzle_r =
+ v3d_translate_pipe_swizzle(image_view->view_swizzle[0]);
+ tex.swizzle_g =
+ v3d_translate_pipe_swizzle(image_view->view_swizzle[1]);
+ tex.swizzle_b =
+ v3d_translate_pipe_swizzle(image_view->view_swizzle[2]);
+ tex.swizzle_a =
+ v3d_translate_pipe_swizzle(image_view->view_swizzle[3]);
+ }
+
+ tex.chroma_offset_x = 1;
+ tex.chroma_offset_y = 1;
+ /* See comment in XML field definition for rationale of the shifts */
+ tex.texture_base_pointer_cb = base_offset >> 6;
+ tex.texture_base_pointer_cr = base_offset >> 6;
+#endif
}
}
}
@@ -166,7 +207,14 @@ v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device,
assert(buffer_view->format->plane_count == 1);
tex.texture_type = buffer_view->format->planes[0].tex_type;
- tex.srgb = vk_format_is_srgb(buffer_view->vk_format);
+
+ bool is_srgb = vk_format_is_srgb(buffer_view->vk_format);
+#if V3D_VERSION == 42
+ tex.srgb = is_srgb;
+#endif
+#if V3D_VERSION >= 71
+ tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE;
+#endif
/* At this point we don't have the job. That's the reason the first
* parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to
@@ -179,5 +227,13 @@ v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device,
buffer_view->offset;
tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
+
+#if V3D_VERSION >= 71
+ tex.chroma_offset_x = 1;
+ tex.chroma_offset_y = 1;
+ /* See comment in XML field definition for rationale of the shifts */
+ tex.texture_base_pointer_cb = base_offset >> 6;
+ tex.texture_base_pointer_cr = base_offset >> 6;
+#endif
}
}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_meta_common.c b/lib/mesa/src/broadcom/vulkan/v3dvx_meta_common.c
index 04147b82c..858096f9e 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dvx_meta_common.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_meta_common.c
@@ -26,6 +26,7 @@
#include "broadcom/common/v3d_macros.h"
#include "broadcom/common/v3d_tfu.h"
+#include "broadcom/common/v3d_util.h"
#include "broadcom/cle/v3dx_pack.h"
#include "broadcom/compiler/v3d_compiler.h"
@@ -58,12 +59,25 @@ emit_rcl_prologue(struct v3dv_job *job,
config.number_of_render_targets = 1;
config.multisample_mode_4x = tiling->msaa;
config.double_buffer_in_non_ms_mode = tiling->double_buffer;
+#if V3D_VERSION == 42
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
+#endif
+#if V3D_VERSION >= 71
+ config.log2_tile_width = log2_tile_size(tiling->tile_width);
+ config.log2_tile_height = log2_tile_size(tiling->tile_height);
+ /* FIXME: ideallly we would like next assert on the packet header (as is
+ * general, so also applies to GL). We would need to expand
+ * gen_pack_header for that.
+ */
+ assert(config.log2_tile_width == config.log2_tile_height ||
+ config.log2_tile_width == config.log2_tile_height + 1);
+#endif
config.internal_depth_type = fb->internal_depth_type;
}
+ const uint32_t *color = NULL;
if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
- uint32_t clear_pad = 0;
+ UNUSED uint32_t clear_pad = 0;
if (clear_info->image) {
const struct v3dv_image *image = clear_info->image;
@@ -88,7 +102,9 @@ emit_rcl_prologue(struct v3dv_job *job,
}
}
- const uint32_t *color = &clear_info->clear_value->color[0];
+ color = &clear_info->clear_value->color[0];
+
+#if V3D_VERSION == 42
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
clear.clear_color_low_32_bits = color[0];
clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
@@ -112,13 +128,49 @@ emit_rcl_prologue(struct v3dv_job *job,
clear.render_target_number = 0;
};
}
+#endif
}
+#if V3D_VERSION == 42
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
rt.render_target_0_internal_bpp = tiling->internal_bpp;
rt.render_target_0_internal_type = fb->internal_type;
rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
}
+#endif
+
+#if V3D_VERSION >= 71
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
+ if (color)
+ rt.clear_color_low_bits = color[0];
+ rt.internal_bpp = tiling->internal_bpp;
+ rt.internal_type_and_clamping = v3dX(clamp_for_format_and_type)(fb->internal_type,
+ fb->vk_format);
+ rt.stride =
+ v3d_compute_rt_row_row_stride_128_bits(tiling->tile_width,
+ v3d_internal_bpp_words(rt.internal_bpp));
+ rt.base_address = 0;
+ rt.render_target_number = 0;
+ }
+
+ if (color && tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) {
+ rt.clear_color_mid_bits = /* 40 bits (32 + 8) */
+ ((uint64_t) color[1]) |
+ (((uint64_t) (color[2] & 0xff)) << 32);
+ rt.render_target_number = 0;
+ }
+ }
+
+ if (color && tiling->internal_bpp >= V3D_INTERNAL_BPP_128) {
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) {
+ rt.clear_color_top_bits = /* 56 bits (24 + 32) */
+ (((uint64_t) (color[2] & 0xffffff00)) >> 8) |
+ (((uint64_t) (color[3])) << 24);
+ rt.render_target_number = 0;
+ }
+ }
+#endif
cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
@@ -179,10 +231,15 @@ emit_frame_setup(struct v3dv_job *job,
*/
if (clear_value &&
(i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
+#if V3D_VERSION == 42
cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
clear.clear_z_stencil_buffer = true;
clear.clear_all_render_targets = true;
}
+#endif
+#if V3D_VERSION >= 71
+ cl_emit(rcl, CLEAR_RENDER_TARGETS, clear);
+#endif
}
cl_emit(rcl, END_OF_TILE_MARKER, end);
}
@@ -893,6 +950,7 @@ v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer,
tfu.iia |= src_offset;
+#if V3D_VERSION <= 42
if (src_tiling == V3D_TILING_RASTER) {
tfu.icfg = V3D33_TFU_ICFG_FORMAT_RASTER << V3D33_TFU_ICFG_FORMAT_SHIFT;
} else {
@@ -901,12 +959,46 @@ v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer,
V3D33_TFU_ICFG_FORMAT_SHIFT;
}
tfu.icfg |= format_plane->tex_type << V3D33_TFU_ICFG_TTYPE_SHIFT;
+#endif
+#if V3D_VERSION >= 71
+ if (src_tiling == V3D_TILING_RASTER) {
+ tfu.icfg = V3D71_TFU_ICFG_FORMAT_RASTER << V3D71_TFU_ICFG_IFORMAT_SHIFT;
+ } else {
+ tfu.icfg = (V3D71_TFU_ICFG_FORMAT_LINEARTILE +
+ (src_tiling - V3D_TILING_LINEARTILE)) <<
+ V3D71_TFU_ICFG_IFORMAT_SHIFT;
+ }
+ tfu.icfg |= format_plane->tex_type << V3D71_TFU_ICFG_OTYPE_SHIFT;
+#endif
tfu.ioa = dst_offset;
+#if V3D_VERSION <= 42
tfu.ioa |= (V3D33_TFU_IOA_FORMAT_LINEARTILE +
(dst_tiling - V3D_TILING_LINEARTILE)) <<
V3D33_TFU_IOA_FORMAT_SHIFT;
+#endif
+
+#if V3D_VERSION >= 71
+ tfu.v71.ioc = (V3D71_TFU_IOC_FORMAT_LINEARTILE +
+ (dst_tiling - V3D_TILING_LINEARTILE)) <<
+ V3D71_TFU_IOC_FORMAT_SHIFT;
+
+ switch (dst_tiling) {
+ case V3D_TILING_UIF_NO_XOR:
+ case V3D_TILING_UIF_XOR:
+ tfu.v71.ioc |=
+ (dst_padded_height_or_stride / (2 * v3d_utile_height(dst_cpp))) <<
+ V3D71_TFU_IOC_STRIDE_SHIFT;
+ break;
+ case V3D_TILING_RASTER:
+ tfu.v71.ioc |= (dst_padded_height_or_stride / dst_cpp) <<
+ V3D71_TFU_IOC_STRIDE_SHIFT;
+ break;
+ default:
+ break;
+ }
+#endif
switch (src_tiling) {
case V3D_TILING_UIF_NO_XOR:
@@ -923,6 +1015,7 @@ v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer,
/* The TFU can handle raster sources but always produces UIF results */
assert(dst_tiling != V3D_TILING_RASTER);
+#if V3D_VERSION <= 42
/* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
* OPAD field for the destination (how many extra UIF blocks beyond
* those necessary to cover the height).
@@ -934,6 +1027,7 @@ v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer,
uif_block_h;
tfu.icfg |= icfg << V3D33_TFU_ICFG_OPAD_SHIFT;
}
+#endif
v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
}
@@ -1314,8 +1408,9 @@ v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t width, height;
framebuffer_size_for_pixel_count(num_items, &width, &height);
- v3dv_job_start_frame(job, width, height, 1, true, true,
- 1, internal_bpp, false);
+ v3dv_job_start_frame(job, width, height, 1, true, true, 1,
+ internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
+ false);
struct v3dv_meta_framebuffer framebuffer;
v3dX(meta_framebuffer_init)(&framebuffer, vk_format, internal_type,
@@ -1361,8 +1456,9 @@ v3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t width, height;
framebuffer_size_for_pixel_count(num_items, &width, &height);
- v3dv_job_start_frame(job, width, height, 1, true, true,
- 1, internal_bpp, false);
+ v3dv_job_start_frame(job, width, height, 1, true, true, 1,
+ internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
+ false);
struct v3dv_meta_framebuffer framebuffer;
v3dX(meta_framebuffer_init)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT,
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_pipeline.c b/lib/mesa/src/broadcom/vulkan/v3dvx_pipeline.c
index 45aec2623..ad22add15 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dvx_pipeline.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_pipeline.c
@@ -223,14 +223,49 @@ pack_cfg_bits(struct v3dv_pipeline *pipeline,
config.depth_test_function = VK_COMPARE_OP_ALWAYS;
}
- /* EZ state will be updated at draw time based on bound pipeline state */
- config.early_z_updates_enable = false;
- config.early_z_enable = false;
-
config.stencil_enable =
ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
pipeline->z_updates_enable = config.z_updates_enable;
+
+#if V3D_VERSION >= 71
+ /* From the Vulkan spec:
+ *
+ * "depthClampEnable controls whether to clamp the fragment’s depth
+ * values as described in Depth Test. If the pipeline is not created
+ * with VkPipelineRasterizationDepthClipStateCreateInfoEXT present
+ * then enabling depth clamp will also disable clipping primitives to
+ * the z planes of the frustrum as described in Primitive Clipping.
+ * Otherwise depth clipping is controlled by the state set in
+ * VkPipelineRasterizationDepthClipStateCreateInfoEXT."
+ *
+ * Note: neither depth clamping nor VK_EXT_depth_clip_enable are actually
+ * supported in the driver yet, so in practice we are always enabling Z
+ * clipping for now.
+ */
+ bool z_clamp_enable = rs_info && rs_info->depthClampEnable;
+ bool z_clip_enable = false;
+ const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
+ ds_info ? vk_find_struct_const(ds_info->pNext,
+ PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT) :
+ NULL;
+ if (clip_info)
+ z_clip_enable = clip_info->depthClipEnable;
+ else if (!z_clamp_enable)
+ z_clip_enable = true;
+
+ if (z_clip_enable) {
+ config.z_clipping_mode = pipeline->negative_one_to_one ?
+ V3D_Z_CLIP_MODE_MIN_ONE_TO_ONE : V3D_Z_CLIP_MODE_ZERO_TO_ONE;
+ } else {
+ config.z_clipping_mode = V3D_Z_CLIP_MODE_NONE;
+ }
+
+ config.z_clamp_mode = z_clamp_enable;
+
+ config.depth_bounds_test_enable =
+ ds_info && ds_info->depthBoundsTestEnable && has_ds_attachment;
+#endif
};
}
@@ -364,7 +399,7 @@ v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
static void
pack_shader_state_record(struct v3dv_pipeline *pipeline)
{
- assert(sizeof(pipeline->shader_state_record) ==
+ assert(sizeof(pipeline->shader_state_record) >=
cl_packet_length(GL_SHADER_STATE_RECORD));
struct v3d_fs_prog_data *prog_data_fs =
@@ -388,7 +423,7 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline)
if (!pipeline->has_gs) {
shader.point_size_in_shaded_vertex_data =
- pipeline->topology == PIPE_PRIM_POINTS;
+ pipeline->topology == MESA_PRIM_POINTS;
} else {
struct v3d_gs_prog_data *prog_data_gs =
pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]->prog_data.gs;
@@ -439,15 +474,16 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline)
shader.number_of_varyings_in_fragment_shader =
prog_data_fs->num_inputs;
- shader.coordinate_shader_propagate_nans = true;
- shader.vertex_shader_propagate_nans = true;
- shader.fragment_shader_propagate_nans = true;
-
/* Note: see previous note about addresses */
/* shader.coordinate_shader_code_address */
/* shader.vertex_shader_code_address */
/* shader.fragment_shader_code_address */
+#if V3D_VERSION == 42
+ shader.coordinate_shader_propagate_nans = true;
+ shader.vertex_shader_propagate_nans = true;
+ shader.fragment_shader_propagate_nans = true;
+
/* FIXME: Use combined input/output size flag in the common case (also
* on v3d, see v3dx_draw).
*/
@@ -455,13 +491,25 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline)
prog_data_vs_bin->separate_segments;
shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
prog_data_vs->separate_segments;
-
shader.coordinate_shader_input_vpm_segment_size =
prog_data_vs_bin->separate_segments ?
prog_data_vs_bin->vpm_input_size : 1;
shader.vertex_shader_input_vpm_segment_size =
prog_data_vs->separate_segments ?
prog_data_vs->vpm_input_size : 1;
+#endif
+
+ /* On V3D 7.1 there isn't a specific flag to set if we are using
+ * shared/separate segments or not. We just set the value of
+ * vpm_input_size to 0, and set output to the max needed. That should be
+ * already properly set on prog_data_vs_bin
+ */
+#if V3D_VERSION == 71
+ shader.coordinate_shader_input_vpm_segment_size =
+ prog_data_vs_bin->vpm_input_size;
+ shader.vertex_shader_input_vpm_segment_size =
+ prog_data_vs->vpm_input_size;
+#endif
shader.coordinate_shader_output_vpm_segment_size =
prog_data_vs_bin->vpm_output_size;
@@ -663,3 +711,76 @@ v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
}
}
}
+
+#if V3D_VERSION == 42
+static bool
+pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
+{
+ for (uint8_t i = 0; i < pipeline->va_count; i++) {
+ if (vk_format_is_int(pipeline->va[i].vk_format))
+ return true;
+ }
+ return false;
+}
+#endif
+
+bool
+v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline)
+{
+#if V3D_VERSION == 42
+ return pipeline_has_integer_vertex_attrib(pipeline);
+#endif
+
+ return false;
+}
+
+/* @pipeline can be NULL. In that case we assume the most common case. For
+ * example, for v42 we assume in that case that all the attributes have a
+ * float format (we only create an all-float BO once and we reuse it with all
+ * float pipelines), otherwise we look at the actual type of each attribute
+ * used with the specific pipeline passed in.
+ */
+struct v3dv_bo *
+v3dX(create_default_attribute_values)(struct v3dv_device *device,
+ struct v3dv_pipeline *pipeline)
+{
+#if V3D_VERSION >= 71
+ return NULL;
+#endif
+
+ uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
+ struct v3dv_bo *bo;
+
+ bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
+
+ if (!bo) {
+ fprintf(stderr, "failed to allocate memory for the default "
+ "attribute values\n");
+ return NULL;
+ }
+
+ bool ok = v3dv_bo_map(device, bo, size);
+ if (!ok) {
+ fprintf(stderr, "failed to map default attribute values buffer\n");
+ return NULL;
+ }
+
+ uint32_t *attrs = bo->map;
+ uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
+ for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
+ attrs[i * 4 + 0] = 0;
+ attrs[i * 4 + 1] = 0;
+ attrs[i * 4 + 2] = 0;
+ VkFormat attr_format =
+ pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
+ if (i < va_count && vk_format_is_int(attr_format)) {
+ attrs[i * 4 + 3] = 1;
+ } else {
+ attrs[i * 4 + 3] = fui(1.0);
+ }
+ }
+
+ v3dv_bo_unmap(device, bo);
+
+ return bo;
+}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_private.h b/lib/mesa/src/broadcom/vulkan/v3dvx_private.h
index c693952d0..0f5887eab 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dvx_private.h
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_private.h
@@ -55,6 +55,9 @@ void
v3dX(cmd_buffer_emit_depth_bias)(struct v3dv_cmd_buffer *cmd_buffer);
void
+v3dX(cmd_buffer_emit_depth_bounds)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
v3dX(cmd_buffer_emit_line_width)(struct v3dv_cmd_buffer *cmd_buffer);
void
@@ -125,17 +128,11 @@ v3dX(get_hw_clear_color)(const VkClearColorValue *color,
uint32_t internal_size,
uint32_t *hw_color);
-void
-v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer,
- int rt,
- uint32_t *rt_bpp,
- uint32_t *rt_type,
- uint32_t *rt_clamp);
-
/* Used at v3dv_device */
void
-v3dX(pack_sampler_state)(struct v3dv_sampler *sampler,
+v3dX(pack_sampler_state)(const struct v3dv_device *device,
+ struct v3dv_sampler *sampler,
const VkSamplerCreateInfo *pCreateInfo,
const VkSamplerCustomBorderColorCreateInfoEXT *bc_info);
@@ -143,7 +140,9 @@ void
v3dX(framebuffer_compute_internal_bpp_msaa)(const struct v3dv_framebuffer *framebuffer,
const struct v3dv_cmd_buffer_attachment_state *attachments,
const struct v3dv_subpass *subpass,
- uint8_t *max_bpp, bool *msaa);
+ uint8_t *max_internal_bpp,
+ uint8_t *total_color_bpp,
+ bool *msaa);
#ifdef DEBUG
void
@@ -165,6 +164,10 @@ v3dX(format_supports_tlb_resolve)(const struct v3dv_format *format);
bool
v3dX(format_supports_blending)(const struct v3dv_format *format);
+/* FIXME: tex_format should be `enum V3DX(Texture_Data_Formats)`, but using
+ * that enum type in the header requires including v3dx_pack.h, which triggers
+ * circular include dependencies issues, so we're using a `uint32_t` for now.
+ */
bool
v3dX(tfu_supports_tex_format)(uint32_t tex_format);
@@ -309,10 +312,24 @@ void
v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
const VkPipelineVertexInputStateCreateInfo *vi_info,
const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info);
+
+bool
+v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline);
+
+struct v3dv_bo *
+v3dX(create_default_attribute_values)(struct v3dv_device *device,
+ struct v3dv_pipeline *pipeline);
+
/* Used at v3dv_queue */
void
v3dX(job_emit_noop)(struct v3dv_job *job);
+/* Used at v3dv_query */
+VkResult
+v3dX(enumerate_performance_query_counters)(uint32_t *pCounterCount,
+ VkPerformanceCounterKHR *pCounters,
+ VkPerformanceCounterDescriptionKHR *pCounterDescriptions);
+
/* Used at v3dv_descriptor_set, and other descriptor set utils */
uint32_t v3dX(descriptor_bo_size)(VkDescriptorType type);
@@ -321,3 +338,21 @@ uint32_t v3dX(max_descriptor_bo_size)(void);
uint32_t v3dX(combined_image_sampler_texture_state_offset)(uint8_t plane);
uint32_t v3dX(combined_image_sampler_sampler_state_offset)(uint8_t plane);
+
+/* General utils */
+
+uint32_t
+v3dX(clamp_for_format_and_type)(uint32_t rt_type,
+ VkFormat vk_format);
+
+#define V3D42_CLIPPER_XY_GRANULARITY 256.0f
+#define V3D71_CLIPPER_XY_GRANULARITY 64.0f
+
+uint32_t
+v3dX(clamp_for_format_and_type)(uint32_t rt_type,
+ VkFormat vk_format);
+
+void
+v3dX(viewport_compute_xform)(const VkViewport *viewport,
+ float scale[3],
+ float translate[3]);
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_query.c b/lib/mesa/src/broadcom/vulkan/v3dvx_query.c
new file mode 100644
index 000000000..e59a1e84f
--- /dev/null
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_query.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright © 2023 Raspberry Pi Ltd
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3dv_private.h"
+
+#include "common/v3d_performance_counters.h"
+
+VkResult
+v3dX(enumerate_performance_query_counters)(uint32_t *pCounterCount,
+ VkPerformanceCounterKHR *pCounters,
+ VkPerformanceCounterDescriptionKHR *pCounterDescriptions)
+{
+ uint32_t desc_count = *pCounterCount;
+
+ VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR,
+ out, pCounters, pCounterCount);
+ VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR,
+ out_desc, pCounterDescriptions, &desc_count);
+
+ for (int i = 0; i < ARRAY_SIZE(v3d_performance_counters); i++) {
+ vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) {
+ counter->unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR;
+ counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
+ counter->storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR;
+
+ unsigned char sha1_result[20];
+ _mesa_sha1_compute(v3d_performance_counters[i][V3D_PERFCNT_NAME],
+ strlen(v3d_performance_counters[i][V3D_PERFCNT_NAME]),
+ sha1_result);
+
+ memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
+ }
+
+ vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR,
+ &out_desc, desc) {
+ desc->flags = 0;
+ snprintf(desc->name, sizeof(desc->name), "%s",
+ v3d_performance_counters[i][V3D_PERFCNT_NAME]);
+ snprintf(desc->category, sizeof(desc->category), "%s",
+ v3d_performance_counters[i][V3D_PERFCNT_CATEGORY]);
+ snprintf(desc->description, sizeof(desc->description), "%s",
+ v3d_performance_counters[i][V3D_PERFCNT_DESCRIPTION]);
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_queue.c b/lib/mesa/src/broadcom/vulkan/v3dvx_queue.c
index efe63de42..6eed2de9d 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dvx_queue.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_queue.c
@@ -29,7 +29,8 @@
void
v3dX(job_emit_noop)(struct v3dv_job *job)
{
- v3dv_job_start_frame(job, 1, 1, 1, true, true, 1, V3D_INTERNAL_BPP_32, false);
+ v3dv_job_start_frame(job, 1, 1, 1, true, true, 1,
+ V3D_INTERNAL_BPP_32, 4, false);
v3dX(job_emit_binning_flush)(job);
struct v3dv_cl *rcl = &job->rcl;
@@ -42,14 +43,29 @@ v3dX(job_emit_noop)(struct v3dv_job *job)
config.image_height_pixels = 1;
config.number_of_render_targets = 1;
config.multisample_mode_4x = false;
+#if V3D_VERSION == 42
config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32;
+#endif
+#if V3D_VERSION >= 71
+ config.log2_tile_width = 3; /* Tile size 64 */
+ config.log2_tile_height = 3; /* Tile size 64 */
+#endif
}
+#if V3D_VERSION == 42
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
rt.render_target_0_internal_bpp = V3D_INTERNAL_BPP_32;
rt.render_target_0_internal_type = V3D_INTERNAL_TYPE_8;
rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
}
+#endif
+#if V3D_VERSION >= 71
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
+ rt.internal_bpp = V3D_INTERNAL_BPP_32;
+ rt.internal_type_and_clamping = V3D_RENDER_TARGET_TYPE_CLAMP_8;
+ rt.stride = 1; /* Unused RT */
+ }
+#endif
cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
clear.z_clear_value = 1.0f;