diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-01-22 02:13:18 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-01-22 02:13:18 +0000 |
commit | fdcc03929065b5bf5dd93553db219ea3e05c8c34 (patch) | |
tree | ca90dc8d9e89febdcd4160956c1b8ec098a4efc9 /lib/mesa/src/freedreno/vulkan | |
parent | 3c9de4a7e13712b5696750bbd59a18c848742022 (diff) |
Import Mesa 19.2.8
Diffstat (limited to 'lib/mesa/src/freedreno/vulkan')
36 files changed, 16671 insertions, 0 deletions
diff --git a/lib/mesa/src/freedreno/vulkan/.clang-format b/lib/mesa/src/freedreno/vulkan/.clang-format new file mode 100644 index 000000000..b14b5dca7 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/.clang-format @@ -0,0 +1,31 @@ +BasedOnStyle: LLVM +AlwaysBreakAfterReturnType: TopLevel +BinPackParameters: false +BraceWrapping: + AfterControlStatement: false + AfterEnum: true + AfterFunction: true + AfterStruct: true + BeforeElse: false + SplitEmptyFunction: true +BreakBeforeBraces: Custom +ColumnLimit: 78 +ContinuationIndentWidth: 3 +Cpp11BracedListStyle: false +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^"tu_private.h"$' + Priority: 0 + - Regex: '^"(drm/|ir3/|tu_)' + Priority: 4 + - Regex: '^"(c11/|compiler/|main/|nir/|spirv/|util/|vk_|wsi_)' + Priority: 3 + - Regex: '^<(vulkan/)' + Priority: 2 + - Regex: '.*' + Priority: 1 +IndentWidth: 3 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyExcessCharacter: 100 +SpaceAfterCStyleCast: true +SpaceBeforeCpp11BracedList: true diff --git a/lib/mesa/src/freedreno/vulkan/TODO b/lib/mesa/src/freedreno/vulkan/TODO new file mode 100644 index 000000000..ff02bdefb --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/TODO @@ -0,0 +1 @@ +- git grep TODO src/freedreno/vulkan diff --git a/lib/mesa/src/freedreno/vulkan/meson.build b/lib/mesa/src/freedreno/vulkan/meson.build new file mode 100644 index 000000000..03079a149 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/meson.build @@ -0,0 +1,145 @@ +# Copyright © 2017 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +tu_entrypoints = custom_target( + 'tu_entrypoints.[ch]', + input : ['tu_entrypoints_gen.py', vk_api_xml], + output : ['tu_entrypoints.h', 'tu_entrypoints.c'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--outdir', + meson.current_build_dir() + ], + depend_files : files('tu_extensions.py'), +) + +tu_extensions_c = custom_target( + 'tu_extensions.c', + input : ['tu_extensions.py', vk_api_xml], + output : ['tu_extensions.c', 'tu_extensions.h'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--out-c', '@OUTPUT0@', + '--out-h', '@OUTPUT1@' + ], +) + +tu_format_table_c = custom_target( + 'tu_format_table.c', + input : ['vk_format_table.py', 'vk_format_layout.csv'], + output : 'vk_format_table.c', + command : [prog_python, '@INPUT@'], + depend_files : files('vk_format_parse.py'), + capture : true, +) + +libtu_files = files( + 'tu_cmd_buffer.c', + 'tu_cs.c', + 'tu_device.c', + 'tu_descriptor_set.c', + 'tu_descriptor_set.h', + 'tu_drm.c', + 'tu_fence.c', + 'tu_formats.c', + 'tu_image.c', + 'tu_meta_blit.c', + 'tu_meta_buffer.c', + 'tu_meta_clear.c', + 'tu_meta_copy.c', + 'tu_meta_resolve.c', + 'tu_pass.c', + 'tu_pipeline.c', + 'tu_pipeline_cache.c', + 'tu_private.h', + 'tu_query.c', + 'tu_shader.c', + 'tu_util.c', + 'tu_util.h', + 'tu_wsi.c', + 'vk_format.h', +) + +tu_deps = [] +tu_flags = [] + +if with_platform_wayland + tu_deps += dep_wayland_client + tu_flags += '-DVK_USE_PLATFORM_WAYLAND_KHR' + libtu_files += files('tu_wsi_wayland.c') +endif + +libvulkan_freedreno = shared_library( + 'vulkan_freedreno', + [libtu_files, tu_entrypoints, tu_extensions_c, tu_format_table_c, freedreno_xml_header_files], + include_directories : [ + inc_common, + inc_compiler, + inc_vulkan_wsi, + inc_freedreno, + ], + link_with : [ + libvulkan_wsi, + libfreedreno_drm, # required by ir3_shader_get_variant, which we don't use + libfreedreno_ir3, + ], + dependencies : [ + dep_dl, + dep_elf, + dep_libdrm, + dep_llvm, + dep_m, + dep_thread, + dep_valgrind, + idep_nir, + tu_deps, + idep_vulkan_util, + idep_mesautil, + ], + c_args : [c_vis_args, no_override_init_args, tu_flags], + link_args : [ld_args_bsymbolic, ld_args_gc_sections], + install : true, +) + +if with_tests and prog_nm.found() + test( + 'tu symbols check', + symbols_check, + args : [ + '--lib', libvulkan_freedreno, + '--symbols-file', vulkan_icd_symbols, + '--nm', prog_nm.path(), + ], + suite : ['freedreno'], + ) +endif + +freedreno_icd = custom_target( + 'freedreno_icd', + input : 'tu_icd.py', + output : 'freedreno_icd.@0@.json'.format(host_machine.cpu()), + command : [ + prog_python, '@INPUT@', + '--lib-path', join_paths(get_option('prefix'), get_option('libdir')), + '--out', '@OUTPUT@', + ], + depend_files : files('tu_extensions.py'), + build_by_default : true, + install_dir : with_vulkan_icd_dir, + install : true, +) diff --git a/lib/mesa/src/freedreno/vulkan/tu_android.c b/lib/mesa/src/freedreno/vulkan/tu_android.c new file mode 100644 index 000000000..1ebc9e726 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_android.c @@ -0,0 +1,382 @@ +/* + * Copyright © 2017, Google Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include <hardware/gralloc.h> +#include <hardware/hardware.h> +#include <hardware/hwvulkan.h> +#include <libsync.h> + +#include <vulkan/vk_android_native_buffer.h> +#include <vulkan/vk_icd.h> + +static int +tu_hal_open(const struct hw_module_t *mod, + const char *id, + struct hw_device_t **dev); +static int +tu_hal_close(struct hw_device_t *dev); + +static void UNUSED +static_asserts(void) +{ + STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC); +} + +PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = { + .common = + { + .tag = HARDWARE_MODULE_TAG, + .module_api_version = HWVULKAN_MODULE_API_VERSION_0_1, + .hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0), + .id = HWVULKAN_HARDWARE_MODULE_ID, + .name = "AMD Vulkan HAL", + .author = "Google", + .methods = + &(hw_module_methods_t){ + .open = tu_hal_open, + }, + }, +}; + +/* If any bits in test_mask are set, then unset them and return true. */ +static inline bool +unmask32(uint32_t *inout_mask, uint32_t test_mask) +{ + uint32_t orig_mask = *inout_mask; + *inout_mask &= ~test_mask; + return *inout_mask != orig_mask; +} + +static int +tu_hal_open(const struct hw_module_t *mod, + const char *id, + struct hw_device_t **dev) +{ + assert(mod == &HAL_MODULE_INFO_SYM.common); + assert(strcmp(id, HWVULKAN_DEVICE_0) == 0); + + hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev)); + if (!hal_dev) + return -1; + + *hal_dev = (hwvulkan_device_t){ + .common = + { + .tag = HARDWARE_DEVICE_TAG, + .version = HWVULKAN_DEVICE_API_VERSION_0_1, + .module = &HAL_MODULE_INFO_SYM.common, + .close = tu_hal_close, + }, + .EnumerateInstanceExtensionProperties = + tu_EnumerateInstanceExtensionProperties, + .CreateInstance = tu_CreateInstance, + .GetInstanceProcAddr = tu_GetInstanceProcAddr, + }; + + *dev = &hal_dev->common; + return 0; +} + +static int +tu_hal_close(struct hw_device_t *dev) +{ + /* hwvulkan.h claims that hw_device_t::close() is never called. */ + return -1; +} + +VkResult +tu_image_from_gralloc(VkDevice device_h, + const VkImageCreateInfo *base_info, + const VkNativeBufferANDROID *gralloc_info, + const VkAllocationCallbacks *alloc, + VkImage *out_image_h) + +{ + TU_FROM_HANDLE(tu_device, device, device_h); + VkImage image_h = VK_NULL_HANDLE; + struct tu_image *image = NULL; + struct tu_bo *bo = NULL; + VkResult result; + + result = tu_image_create( + device_h, + &(struct tu_image_create_info) { + .vk_info = base_info, .scanout = true, .no_metadata_planes = true }, + alloc, &image_h); + + if (result != VK_SUCCESS) + return result; + + if (gralloc_info->handle->numFds != 1) { + return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE, + "VkNativeBufferANDROID::handle::numFds is %d, " + "expected 1", + gralloc_info->handle->numFds); + } + + /* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf + * must exceed that of the gralloc handle, and we do not own the gralloc + * handle. + */ + int dma_buf = gralloc_info->handle->data[0]; + + image = tu_image_from_handle(image_h); + + VkDeviceMemory memory_h; + + const VkMemoryDedicatedAllocateInfo ded_alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, + .pNext = NULL, + .buffer = VK_NULL_HANDLE, + .image = image_h + }; + + const VkImportMemoryFdInfo import_info = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO, + .pNext = &ded_alloc, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, + .fd = dup(dma_buf), + }; + /* Find the first VRAM memory type, or GART for PRIME images. */ + int memory_type_index = -1; + for (int i = 0; + i < device->physical_device->memory_properties.memoryTypeCount; ++i) { + bool is_local = + !!(device->physical_device->memory_properties.memoryTypes[i] + .propertyFlags & + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (is_local) { + memory_type_index = i; + break; + } + } + + /* fallback */ + if (memory_type_index == -1) + memory_type_index = 0; + + result = + tu_AllocateMemory(device_h, + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &import_info, + .allocationSize = image->size, + .memoryTypeIndex = memory_type_index, + }, + alloc, &memory_h); + if (result != VK_SUCCESS) + goto fail_create_image; + + tu_BindImageMemory(device_h, image_h, memory_h, 0); + + image->owned_memory = memory_h; + /* Don't clobber the out-parameter until success is certain. */ + *out_image_h = image_h; + + return VK_SUCCESS; + +fail_create_image: +fail_size: + tu_DestroyImage(device_h, image_h, alloc); + + return result; +} + +VkResult +tu_GetSwapchainGrallocUsageANDROID(VkDevice device_h, + VkFormat format, + VkImageUsageFlags imageUsage, + int *grallocUsage) +{ + TU_FROM_HANDLE(tu_device, device, device_h); + struct tu_physical_device *phys_dev = device->physical_device; + VkPhysicalDevice phys_dev_h = tu_physical_device_to_handle(phys_dev); + VkResult result; + + *grallocUsage = 0; + + /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags + * returned to applications via + * VkSurfaceCapabilitiesKHR::supportedUsageFlags. + * The relevant code in libvulkan/swapchain.cpp contains this fun comment: + * + * TODO(jessehall): I think these are right, but haven't thought hard + * about it. Do we need to query the driver for support of any of + * these? + * + * Any disagreement between this function and the hardcoded + * VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests + * dEQP-VK.wsi.android.swapchain.*.image_usage to fail. + */ + + const VkPhysicalDeviceImageFormatInfo2 image_format_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .format = format, + .type = VK_IMAGE_TYPE_2D, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = imageUsage, + }; + + VkImageFormatProperties2 image_format_props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, + }; + + /* Check that requested format and usage are supported. */ + result = tu_GetPhysicalDeviceImageFormatProperties2( + phys_dev_h, &image_format_info, &image_format_props); + if (result != VK_SUCCESS) { + return vk_errorf(device->instance, result, + "tu_GetPhysicalDeviceImageFormatProperties2 failed " + "inside %s", + __func__); + } + + if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) + *grallocUsage |= GRALLOC_USAGE_HW_RENDER; + + if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) + *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE; + + /* All VkImageUsageFlags not explicitly checked here are unsupported for + * gralloc swapchains. + */ + if (imageUsage != 0) { + return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, + "unsupported VkImageUsageFlags(0x%x) for gralloc " + "swapchain", + imageUsage); + } + + /* + * FINISHME: Advertise all display-supported formats. Mostly + * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check + * what we need for 30-bit colors. + */ + if (format == VK_FORMAT_B8G8R8A8_UNORM || + format == VK_FORMAT_B5G6R5_UNORM_PACK16) { + *grallocUsage |= GRALLOC_USAGE_HW_FB | GRALLOC_USAGE_HW_COMPOSER | + GRALLOC_USAGE_EXTERNAL_DISP; + } + + if (*grallocUsage == 0) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + return VK_SUCCESS; +} + +VkResult +tu_AcquireImageANDROID(VkDevice device, + VkImage image_h, + int nativeFenceFd, + VkSemaphore semaphore, + VkFence fence) +{ + VkResult semaphore_result = VK_SUCCESS, fence_result = VK_SUCCESS; + + if (semaphore != VK_NULL_HANDLE) { + int semaphore_fd = + nativeFenceFd >= 0 ? dup(nativeFenceFd) : nativeFenceFd; + semaphore_result = tu_ImportSemaphoreFdKHR( + device, &(VkImportSemaphoreFdInfoKHR) { + .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR, + .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, + .fd = semaphore_fd, + .semaphore = semaphore, + }); + } + + if (fence != VK_NULL_HANDLE) { + int fence_fd = nativeFenceFd >= 0 ? dup(nativeFenceFd) : nativeFenceFd; + fence_result = tu_ImportFenceFdKHR( + device, &(VkImportFenceFdInfoKHR) { + .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR, + .flags = VK_FENCE_IMPORT_TEMPORARY_BIT, + .fd = fence_fd, + .fence = fence, + }); + } + + close(nativeFenceFd); + + if (semaphore_result != VK_SUCCESS) + return semaphore_result; + return fence_result; +} + +VkResult +tu_QueueSignalReleaseImageANDROID(VkQueue _queue, + uint32_t waitSemaphoreCount, + const VkSemaphore *pWaitSemaphores, + VkImage image, + int *pNativeFenceFd) +{ + TU_FROM_HANDLE(tu_queue, queue, _queue); + VkResult result = VK_SUCCESS; + + if (waitSemaphoreCount == 0) { + if (pNativeFenceFd) + *pNativeFenceFd = -1; + return VK_SUCCESS; + } + + int fd = -1; + + for (uint32_t i = 0; i < waitSemaphoreCount; ++i) { + int tmp_fd; + result = tu_GetSemaphoreFdKHR( + tu_device_to_handle(queue->device), + &(VkSemaphoreGetFdInfoKHR) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + .semaphore = pWaitSemaphores[i], + }, + &tmp_fd); + if (result != VK_SUCCESS) { + if (fd >= 0) + close(fd); + return result; + } + + if (fd < 0) + fd = tmp_fd; + else if (tmp_fd >= 0) { + sync_accumulate("tu", &fd, tmp_fd); + close(tmp_fd); + } + } + + if (pNativeFenceFd) { + *pNativeFenceFd = fd; + } else if (fd >= 0) { + close(fd); + /* We still need to do the exports, to reset the semaphores, but + * otherwise we don't wait on them. */ + } + return VK_SUCCESS; +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c b/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c new file mode 100644 index 000000000..fe436e595 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c @@ -0,0 +1,2637 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include "registers/adreno_pm4.xml.h" +#include "registers/adreno_common.xml.h" +#include "registers/a6xx.xml.h" + +#include "vk_format.h" + +#include "tu_cs.h" + +void +tu_bo_list_init(struct tu_bo_list *list) +{ + list->count = list->capacity = 0; + list->bo_infos = NULL; +} + +void +tu_bo_list_destroy(struct tu_bo_list *list) +{ + free(list->bo_infos); +} + +void +tu_bo_list_reset(struct tu_bo_list *list) +{ + list->count = 0; +} + +/** + * \a flags consists of MSM_SUBMIT_BO_FLAGS. + */ +static uint32_t +tu_bo_list_add_info(struct tu_bo_list *list, + const struct drm_msm_gem_submit_bo *bo_info) +{ + for (uint32_t i = 0; i < list->count; ++i) { + if (list->bo_infos[i].handle == bo_info->handle) { + assert(list->bo_infos[i].presumed == bo_info->presumed); + list->bo_infos[i].flags |= bo_info->flags; + return i; + } + } + + /* grow list->bo_infos if needed */ + if (list->count == list->capacity) { + uint32_t new_capacity = MAX2(2 * list->count, 16); + struct drm_msm_gem_submit_bo *new_bo_infos = realloc( + list->bo_infos, new_capacity * sizeof(struct drm_msm_gem_submit_bo)); + if (!new_bo_infos) + return TU_BO_LIST_FAILED; + list->bo_infos = new_bo_infos; + list->capacity = new_capacity; + } + + list->bo_infos[list->count] = *bo_info; + return list->count++; +} + +uint32_t +tu_bo_list_add(struct tu_bo_list *list, + const struct tu_bo *bo, + uint32_t flags) +{ + return tu_bo_list_add_info(list, &(struct drm_msm_gem_submit_bo) { + .flags = flags, + .handle = bo->gem_handle, + .presumed = bo->iova, + }); +} + +VkResult +tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other) +{ + for (uint32_t i = 0; i < other->count; i++) { + if (tu_bo_list_add_info(list, other->bo_infos + i) == TU_BO_LIST_FAILED) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + return VK_SUCCESS; +} + +static VkResult +tu_tiling_config_update_gmem_layout(struct tu_tiling_config *tiling, + const struct tu_device *dev) +{ + const uint32_t gmem_size = dev->physical_device->gmem_size; + uint32_t offset = 0; + + for (uint32_t i = 0; i < tiling->buffer_count; i++) { + /* 16KB-aligned */ + offset = align(offset, 0x4000); + + tiling->gmem_offsets[i] = offset; + offset += tiling->tile0.extent.width * tiling->tile0.extent.height * + tiling->buffer_cpp[i]; + } + + return offset <= gmem_size ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +static void +tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling, + const struct tu_device *dev) +{ + const uint32_t tile_align_w = dev->physical_device->tile_align_w; + const uint32_t tile_align_h = dev->physical_device->tile_align_h; + const uint32_t max_tile_width = 1024; /* A6xx */ + + tiling->tile0.offset = (VkOffset2D) { + .x = tiling->render_area.offset.x & ~(tile_align_w - 1), + .y = tiling->render_area.offset.y & ~(tile_align_h - 1), + }; + + const uint32_t ra_width = + tiling->render_area.extent.width + + (tiling->render_area.offset.x - tiling->tile0.offset.x); + const uint32_t ra_height = + tiling->render_area.extent.height + + (tiling->render_area.offset.y - tiling->tile0.offset.y); + + /* start from 1 tile */ + tiling->tile_count = (VkExtent2D) { + .width = 1, + .height = 1, + }; + tiling->tile0.extent = (VkExtent2D) { + .width = align(ra_width, tile_align_w), + .height = align(ra_height, tile_align_h), + }; + + /* do not exceed max tile width */ + while (tiling->tile0.extent.width > max_tile_width) { + tiling->tile_count.width++; + tiling->tile0.extent.width = + align(ra_width / tiling->tile_count.width, tile_align_w); + } + + /* do not exceed gmem size */ + while (tu_tiling_config_update_gmem_layout(tiling, dev) != VK_SUCCESS) { + if (tiling->tile0.extent.width > tiling->tile0.extent.height) { + tiling->tile_count.width++; + tiling->tile0.extent.width = + align(ra_width / tiling->tile_count.width, tile_align_w); + } else { + tiling->tile_count.height++; + tiling->tile0.extent.height = + align(ra_height / tiling->tile_count.height, tile_align_h); + } + } +} + +static void +tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling, + const struct tu_device *dev) +{ + const uint32_t max_pipe_count = 32; /* A6xx */ + + /* start from 1 tile per pipe */ + tiling->pipe0 = (VkExtent2D) { + .width = 1, + .height = 1, + }; + tiling->pipe_count = tiling->tile_count; + + /* do not exceed max pipe count vertically */ + while (tiling->pipe_count.height > max_pipe_count) { + tiling->pipe0.height += 2; + tiling->pipe_count.height = + (tiling->tile_count.height + tiling->pipe0.height - 1) / + tiling->pipe0.height; + } + + /* do not exceed max pipe count */ + while (tiling->pipe_count.width * tiling->pipe_count.height > + max_pipe_count) { + tiling->pipe0.width += 1; + tiling->pipe_count.width = + (tiling->tile_count.width + tiling->pipe0.width - 1) / + tiling->pipe0.width; + } +} + +static void +tu_tiling_config_update_pipes(struct tu_tiling_config *tiling, + const struct tu_device *dev) +{ + const uint32_t max_pipe_count = 32; /* A6xx */ + const uint32_t used_pipe_count = + tiling->pipe_count.width * tiling->pipe_count.height; + const VkExtent2D last_pipe = { + .width = tiling->tile_count.width % tiling->pipe0.width, + .height = tiling->tile_count.height % tiling->pipe0.height, + }; + + assert(used_pipe_count <= max_pipe_count); + assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config)); + + for (uint32_t y = 0; y < tiling->pipe_count.height; y++) { + for (uint32_t x = 0; x < tiling->pipe_count.width; x++) { + const uint32_t pipe_x = tiling->pipe0.width * x; + const uint32_t pipe_y = tiling->pipe0.height * y; + const uint32_t pipe_w = (x == tiling->pipe_count.width - 1) + ? last_pipe.width + : tiling->pipe0.width; + const uint32_t pipe_h = (y == tiling->pipe_count.height - 1) + ? last_pipe.height + : tiling->pipe0.height; + const uint32_t n = tiling->pipe_count.width * y + x; + + tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) | + A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) | + A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) | + A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h); + tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h); + } + } + + memset(tiling->pipe_config + used_pipe_count, 0, + sizeof(uint32_t) * (max_pipe_count - used_pipe_count)); +} + +static void +tu_tiling_config_update(struct tu_tiling_config *tiling, + const struct tu_device *dev, + const uint32_t *buffer_cpp, + uint32_t buffer_count, + const VkRect2D *render_area) +{ + /* see if there is any real change */ + const bool ra_changed = + render_area && + memcmp(&tiling->render_area, render_area, sizeof(*render_area)); + const bool buf_changed = tiling->buffer_count != buffer_count || + memcmp(tiling->buffer_cpp, buffer_cpp, + sizeof(*buffer_cpp) * buffer_count); + if (!ra_changed && !buf_changed) + return; + + if (ra_changed) + tiling->render_area = *render_area; + + if (buf_changed) { + memcpy(tiling->buffer_cpp, buffer_cpp, + sizeof(*buffer_cpp) * buffer_count); + tiling->buffer_count = buffer_count; + } + + tu_tiling_config_update_tile_layout(tiling, dev); + tu_tiling_config_update_pipe_layout(tiling, dev); + tu_tiling_config_update_pipes(tiling, dev); +} + +static void +tu_tiling_config_get_tile(const struct tu_tiling_config *tiling, + const struct tu_device *dev, + uint32_t tx, + uint32_t ty, + struct tu_tile *tile) +{ + /* find the pipe and the slot for tile (tx, ty) */ + const uint32_t px = tx / tiling->pipe0.width; + const uint32_t py = ty / tiling->pipe0.height; + const uint32_t sx = tx - tiling->pipe0.width * px; + const uint32_t sy = ty - tiling->pipe0.height * py; + + assert(tx < tiling->tile_count.width && ty < tiling->tile_count.height); + assert(px < tiling->pipe_count.width && py < tiling->pipe_count.height); + assert(sx < tiling->pipe0.width && sy < tiling->pipe0.height); + + /* convert to 1D indices */ + tile->pipe = tiling->pipe_count.width * py + px; + tile->slot = tiling->pipe0.width * sy + sx; + + /* get the blit area for the tile */ + tile->begin = (VkOffset2D) { + .x = tiling->tile0.offset.x + tiling->tile0.extent.width * tx, + .y = tiling->tile0.offset.y + tiling->tile0.extent.height * ty, + }; + tile->end.x = + (tx == tiling->tile_count.width - 1) + ? tiling->render_area.offset.x + tiling->render_area.extent.width + : tile->begin.x + tiling->tile0.extent.width; + tile->end.y = + (ty == tiling->tile_count.height - 1) + ? tiling->render_area.offset.y + tiling->render_area.extent.height + : tile->begin.y + tiling->tile0.extent.height; +} + +static enum a3xx_msaa_samples +tu6_msaa_samples(uint32_t samples) +{ + switch (samples) { + case 1: + return MSAA_ONE; + case 2: + return MSAA_TWO; + case 4: + return MSAA_FOUR; + case 8: + return MSAA_EIGHT; + default: + assert(!"invalid sample count"); + return MSAA_ONE; + } +} + +static enum a4xx_index_size +tu6_index_size(VkIndexType type) +{ + switch (type) { + case VK_INDEX_TYPE_UINT16: + return INDEX4_SIZE_16_BIT; + case VK_INDEX_TYPE_UINT32: + return INDEX4_SIZE_32_BIT; + default: + unreachable("invalid VkIndexType"); + return INDEX4_SIZE_8_BIT; + } +} + +static void +tu6_emit_marker(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + tu_cs_emit_write_reg(cs, cmd->marker_reg, ++cmd->marker_seqno); +} + +void +tu6_emit_event_write(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + enum vgt_event_type event, + bool need_seqno) +{ + tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, need_seqno ? 4 : 1); + tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(event)); + if (need_seqno) { + tu_cs_emit_qw(cs, cmd->scratch_bo.iova); + tu_cs_emit(cs, ++cmd->scratch_seqno); + } +} + +static void +tu6_emit_cache_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + tu6_emit_event_write(cmd, cs, 0x31, false); +} + +static void +tu6_emit_lrz_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + tu6_emit_event_write(cmd, cs, LRZ_FLUSH, false); +} + +static void +tu6_emit_wfi(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + if (cmd->wait_for_idle) { + tu_cs_emit_wfi(cs); + cmd->wait_for_idle = false; + } +} + +static void +tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + const struct tu_subpass *subpass = cmd->state.subpass; + + const uint32_t a = subpass->depth_stencil_attachment.attachment; + if (a == VK_ATTACHMENT_UNUSED) { + tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6); + tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE)); + tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */ + tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */ + tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */ + tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ + tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */ + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); + tu_cs_emit(cs, + A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE)); + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5); + tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ + tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ + tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */ + tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */ + tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */ + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_INFO, 1); + tu_cs_emit(cs, 0x00000000); /* RB_STENCIL_INFO */ + + return; + } + + /* enable zs? */ +} + +static void +tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + const struct tu_framebuffer *fb = cmd->state.framebuffer; + const struct tu_subpass *subpass = cmd->state.subpass; + const struct tu_tiling_config *tiling = &cmd->state.tiling_config; + unsigned char mrt_comp[MAX_RTS] = { 0 }; + unsigned srgb_cntl = 0; + + uint32_t gmem_index = 0; + for (uint32_t i = 0; i < subpass->color_count; ++i) { + uint32_t a = subpass->color_attachments[i].attachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; + + const struct tu_image_view *iview = fb->attachments[a].attachment; + const struct tu_image_level *slice = + &iview->image->levels[iview->base_mip]; + const enum a6xx_tile_mode tile_mode = TILE6_LINEAR; + uint32_t stride = 0; + uint32_t offset = 0; + + mrt_comp[i] = 0xf; + + if (vk_format_is_srgb(iview->vk_format)) + srgb_cntl |= (1 << i); + + const struct tu_native_format *format = + tu6_get_native_format(iview->vk_format); + assert(format && format->rb >= 0); + + offset = slice->offset + slice->size * iview->base_layer; + stride = slice->pitch * vk_format_get_blocksize(iview->vk_format); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6); + tu_cs_emit(cs, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format->rb) | + A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | + A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(format->swap)); + tu_cs_emit(cs, A6XX_RB_MRT_PITCH(stride)); + tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(slice->size)); + tu_cs_emit_qw(cs, iview->image->bo->iova + iview->image->bo_offset + + offset); /* BASE_LO/HI */ + tu_cs_emit( + cs, tiling->gmem_offsets[gmem_index++]); /* RB_MRT[i].BASE_GMEM */ + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_MRT_REG(i), 1); + tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb)); + +#if 0 + /* when we support UBWC, these would be the system memory + * addr/pitch/etc: + */ + tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 4); + tu_cs_emit(cs, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */ + tu_cs_emit(cs, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */ + tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_PITCH(0)); + tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0)); +#endif + } + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_SRGB_CNTL, 1); + tu_cs_emit(cs, srgb_cntl); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_SRGB_CNTL, 1); + tu_cs_emit(cs, srgb_cntl); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_COMPONENTS, 1); + tu_cs_emit(cs, A6XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) | + A6XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) | + A6XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) | + A6XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) | + A6XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) | + A6XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) | + A6XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) | + A6XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7])); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_RENDER_COMPONENTS, 1); + tu_cs_emit(cs, A6XX_SP_FS_RENDER_COMPONENTS_RT0(mrt_comp[0]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT1(mrt_comp[1]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT2(mrt_comp[2]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT3(mrt_comp[3]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT4(mrt_comp[4]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT5(mrt_comp[5]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT6(mrt_comp[6]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT7(mrt_comp[7])); +} + +static void +tu6_emit_msaa(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + const struct tu_subpass *subpass = cmd->state.subpass; + const enum a3xx_msaa_samples samples = + tu6_msaa_samples(subpass->max_sample_count); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2); + tu_cs_emit(cs, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples)); + tu_cs_emit( + cs, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) | + ((samples == MSAA_ONE) ? A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE + : 0)); + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2); + tu_cs_emit(cs, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples)); + tu_cs_emit( + cs, + A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) | + ((samples == MSAA_ONE) ? A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE : 0)); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_RAS_MSAA_CNTL, 2); + tu_cs_emit(cs, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples)); + tu_cs_emit( + cs, + A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) | + ((samples == MSAA_ONE) ? A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE : 0)); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_MSAA_CNTL, 1); + tu_cs_emit(cs, A6XX_RB_MSAA_CNTL_SAMPLES(samples)); +} + +static void +tu6_emit_bin_size(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t flags) +{ + const struct tu_tiling_config *tiling = &cmd->state.tiling_config; + const uint32_t bin_w = tiling->tile0.extent.width; + const uint32_t bin_h = tiling->tile0.extent.height; + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_BIN_CONTROL, 1); + tu_cs_emit(cs, A6XX_GRAS_BIN_CONTROL_BINW(bin_w) | + A6XX_GRAS_BIN_CONTROL_BINH(bin_h) | flags); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL, 1); + tu_cs_emit(cs, A6XX_RB_BIN_CONTROL_BINW(bin_w) | + A6XX_RB_BIN_CONTROL_BINH(bin_h) | flags); + + /* no flag for RB_BIN_CONTROL2... */ + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL2, 1); + tu_cs_emit(cs, A6XX_RB_BIN_CONTROL2_BINW(bin_w) | + A6XX_RB_BIN_CONTROL2_BINH(bin_h)); +} + +static void +tu6_emit_render_cntl(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + bool binning) +{ + uint32_t cntl = 0; + cntl |= A6XX_RB_RENDER_CNTL_UNK4; + if (binning) + cntl |= A6XX_RB_RENDER_CNTL_BINNING; + + tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3); + tu_cs_emit(cs, 0x2); + tu_cs_emit(cs, REG_A6XX_RB_RENDER_CNTL); + tu_cs_emit(cs, cntl); +} + +static void +tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + const VkRect2D *render_area = &cmd->state.tiling_config.render_area; + const uint32_t x1 = render_area->offset.x; + const uint32_t y1 = render_area->offset.y; + const uint32_t x2 = x1 + render_area->extent.width - 1; + const uint32_t y2 = y1 + render_area->extent.height - 1; + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); + tu_cs_emit(cs, + A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1)); + tu_cs_emit(cs, + A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2)); +} + +static void +tu6_emit_blit_info(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_image_view *iview, + uint32_t gmem_offset, + uint32_t blit_info) +{ + const struct tu_image_level *slice = + &iview->image->levels[iview->base_mip]; + const uint32_t offset = slice->offset + slice->size * iview->base_layer; + const uint32_t stride = + slice->pitch * vk_format_get_blocksize(iview->vk_format); + const enum a6xx_tile_mode tile_mode = TILE6_LINEAR; + const enum a3xx_msaa_samples samples = tu6_msaa_samples(1); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1); + tu_cs_emit(cs, blit_info); + + /* tile mode? */ + const struct tu_native_format *format = + tu6_get_native_format(iview->vk_format); + assert(format && format->rb >= 0); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 5); + tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) | + A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | + A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) | + A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap)); + tu_cs_emit_qw(cs, + iview->image->bo->iova + iview->image->bo_offset + offset); + tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(stride)); + tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(slice->size)); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); + tu_cs_emit(cs, gmem_offset); +} + +static void +tu6_emit_blit_clear(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_image_view *iview, + uint32_t gmem_offset, + const VkClearValue *clear_value) +{ + const enum a6xx_tile_mode tile_mode = TILE6_LINEAR; + const enum a3xx_msaa_samples samples = tu6_msaa_samples(1); + + const struct tu_native_format *format = + tu6_get_native_format(iview->vk_format); + assert(format && format->rb >= 0); + /* must be WZYX; other values are ignored */ + const enum a3xx_color_swap swap = WZYX; + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1); + tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) | + A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | + A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) | + A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(swap)); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1); + tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf)); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); + tu_cs_emit(cs, gmem_offset); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1); + tu_cs_emit(cs, 0); + + /* pack clear_value into WZYX order */ + uint32_t clear_vals[4] = { 0 }; + tu_pack_clear_value(clear_value, iview->vk_format, clear_vals); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); + tu_cs_emit(cs, clear_vals[0]); + tu_cs_emit(cs, clear_vals[1]); + tu_cs_emit(cs, clear_vals[2]); + tu_cs_emit(cs, clear_vals[3]); +} + +static void +tu6_emit_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + tu6_emit_marker(cmd, cs); + tu6_emit_event_write(cmd, cs, BLIT, false); + tu6_emit_marker(cmd, cs); +} + +static void +tu6_emit_window_scissor(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + uint32_t x1, + uint32_t y1, + uint32_t x2, + uint32_t y2) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); + tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) | + A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1)); + tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) | + A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2)); + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RESOLVE_CNTL_1, 2); + tu_cs_emit( + cs, A6XX_GRAS_RESOLVE_CNTL_1_X(x1) | A6XX_GRAS_RESOLVE_CNTL_1_Y(y1)); + tu_cs_emit( + cs, A6XX_GRAS_RESOLVE_CNTL_2_X(x2) | A6XX_GRAS_RESOLVE_CNTL_2_Y(y2)); +} + +static void +tu6_emit_window_offset(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + uint32_t x1, + uint32_t y1) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET, 1); + tu_cs_emit(cs, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1)); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET2, 1); + tu_cs_emit(cs, + A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1)); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_WINDOW_OFFSET, 1); + tu_cs_emit(cs, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1)); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_WINDOW_OFFSET, 1); + tu_cs_emit( + cs, A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1)); +} + +static void +tu6_emit_tile_select(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_tile *tile) +{ + tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); + tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(0x7)); + + tu6_emit_marker(cmd, cs); + tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); + tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10); + tu6_emit_marker(cmd, cs); + + const uint32_t x1 = tile->begin.x; + const uint32_t y1 = tile->begin.y; + const uint32_t x2 = tile->end.x - 1; + const uint32_t y2 = tile->end.y - 1; + tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2); + tu6_emit_window_offset(cmd, cs, x1, y1); + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_OVERRIDE, 1); + tu_cs_emit(cs, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); + + if (false) { + /* hw binning? */ + } else { + tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1); + tu_cs_emit(cs, 0x1); + + tu_cs_emit_pkt7(cs, CP_SET_MODE, 1); + tu_cs_emit(cs, 0x0); + } +} + +static void +tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + const struct tu_framebuffer *fb = cmd->state.framebuffer; + const struct tu_subpass *subpass = cmd->state.subpass; + const struct tu_tiling_config *tiling = &cmd->state.tiling_config; + const struct tu_attachment_state *attachments = cmd->state.attachments; + + tu6_emit_blit_scissor(cmd, cs); + + uint32_t gmem_index = 0; + for (uint32_t i = 0; i < subpass->color_count; ++i) { + const uint32_t a = subpass->color_attachments[i].attachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; + + const struct tu_image_view *iview = fb->attachments[a].attachment; + const struct tu_attachment_state *att = attachments + a; + if (att->pending_clear_aspects) { + assert(att->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT); + tu6_emit_blit_clear(cmd, cs, iview, + tiling->gmem_offsets[gmem_index++], + &att->clear_value); + } else { + tu6_emit_blit_info(cmd, cs, iview, + tiling->gmem_offsets[gmem_index++], + A6XX_RB_BLIT_INFO_UNK0 | A6XX_RB_BLIT_INFO_GMEM); + } + + tu6_emit_blit(cmd, cs); + } + + /* load/clear zs? */ +} + +static void +tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + const struct tu_framebuffer *fb = cmd->state.framebuffer; + const struct tu_tiling_config *tiling = &cmd->state.tiling_config; + + if (false) { + /* hw binning? */ + } + + tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3); + tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE__0_GROUP_ID(0)); + tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0)); + tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0)); + + tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + tu_cs_emit(cs, 0x0); + + tu6_emit_marker(cmd, cs); + tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); + tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); + tu6_emit_marker(cmd, cs); + + tu6_emit_blit_scissor(cmd, cs); + + uint32_t gmem_index = 0; + for (uint32_t i = 0; i < cmd->state.subpass->color_count; ++i) { + uint32_t a = cmd->state.subpass->color_attachments[i].attachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; + + const struct tu_image_view *iview = fb->attachments[a].attachment; + tu6_emit_blit_info(cmd, cs, iview, tiling->gmem_offsets[gmem_index++], + 0); + tu6_emit_blit(cmd, cs); + } +} + +static void +tu6_emit_restart_index(struct tu_cs *cs, uint32_t restart_index) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_PC_RESTART_INDEX, 1); + tu_cs_emit(cs, restart_index); +} + +static void +tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + VkResult result = tu_cs_reserve_space(cmd->device, cs, 256); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + tu6_emit_cache_flush(cmd, cs); + + tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff); + + tu_cs_emit_write_reg(cs, REG_A6XX_RB_CCU_CNTL, 0x7c400004); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E04, 0x00100000); + tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE04, 0x8); + tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE00, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE0F, 0x3f); + tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B605, 0x44); + tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B600, 0x100000); + tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80); + tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE01, 0); + + tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9600, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8600, 0x880); + tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE04, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE03, 0x00000410); + tu_cs_emit_write_reg(cs, REG_A6XX_SP_IBO_COUNT, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B182, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BB11, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000); + tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_CLIENT_PF, 4); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E01, 0x0); + tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AB00, 0x5); + tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A009, 0x00000001); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8811, 0x00000010); + tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x1f); + + tu_cs_emit_write_reg(cs, REG_A6XX_RB_SRGB_CNTL, 0); + + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8101, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SAMPLE_CNTL, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8110, 0); + + tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL0, 0x401); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL1, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_SAMPLE_CNTL, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8818, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8819, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881A, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881B, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881C, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881D, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0); + + tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9101, 0xffff00); + tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9107, 0); + + tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9236, 1); + tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9300, 0); + + tu_cs_emit_write_reg(cs, REG_A6XX_VPC_SO_OVERRIDE, + A6XX_VPC_SO_OVERRIDE_SO_DISABLE); + + tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9801, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9980, 0); + + tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B06, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B06, 0); + + tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A81B, 0); + + tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0); + + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8099, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_809B, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A0, 2); + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9211, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9602, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9981, 0x3); + tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9E72, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9108, 0x3); + tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B304, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B309, 0x000000a2); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8804, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A4, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A5, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A6, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8805, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8806, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8878, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8879, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc); + + tu6_emit_marker(cmd, cs); + + tu_cs_emit_write_reg(cs, REG_A6XX_VFD_MODE_CNTL, 0x00000000); + + tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0); + + tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x0000001f); + + /* we don't use this yet.. probably best to disable.. */ + tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3); + tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE__0_GROUP_ID(0)); + tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0)); + tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0)); + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(0), 3); + tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_LO_0 */ + tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_HI_0 */ + tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_SIZE_0 */ + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_FLUSH_BASE_LO(0), 2); + tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_LO_0 */ + tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_HI_0 */ + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUF_CNTL, 1); + tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUF_CNTL */ + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(0), 1); + tu_cs_emit(cs, 0x00000000); /* UNKNOWN_E2AB */ + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(1), 3); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(1), 6); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(2), 6); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(3), 3); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_CTRL_REG0, 1); + tu_cs_emit(cs, 0x00000000); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CTRL_REG0, 1); + tu_cs_emit(cs, 0x00000000); + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1); + tu_cs_emit(cs, 0x00000000); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_LRZ_CNTL, 1); + tu_cs_emit(cs, 0x00000000); + + tu_cs_sanity_check(cs); +} + +static void +tu6_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + VkResult result = tu_cs_reserve_space(cmd->device, cs, 256); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + tu6_emit_lrz_flush(cmd, cs); + + /* lrz clear? */ + + tu6_emit_cache_flush(cmd, cs); + + tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + tu_cs_emit(cs, 0x0); + + /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ + tu6_emit_wfi(cmd, cs); + tu_cs_emit_pkt4(cs, REG_A6XX_RB_CCU_CNTL, 1); + tu_cs_emit(cs, 0x7c400004); /* RB_CCU_CNTL */ + + tu6_emit_zs(cmd, cs); + tu6_emit_mrt(cmd, cs); + tu6_emit_msaa(cmd, cs); + + if (false) { + /* hw binning? */ + } else { + tu6_emit_bin_size(cmd, cs, 0x6000000); + /* no draws */ + } + + tu6_emit_render_cntl(cmd, cs, false); + + tu_cs_sanity_check(cs); +} + +static void +tu6_render_tile(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_tile *tile) +{ + const uint32_t render_tile_space = 64 + tu_cs_get_call_size(&cmd->draw_cs); + VkResult result = tu_cs_reserve_space(cmd->device, cs, render_tile_space); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + tu6_emit_tile_select(cmd, cs, tile); + tu_cs_emit_ib(cs, &cmd->state.tile_load_ib); + + tu_cs_emit_call(cs, &cmd->draw_cs); + cmd->wait_for_idle = true; + + tu_cs_emit_ib(cs, &cmd->state.tile_store_ib); + + tu_cs_sanity_check(cs); +} + +static void +tu6_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + VkResult result = tu_cs_reserve_space(cmd->device, cs, 16); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1); + tu_cs_emit(cs, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_UNK3); + + tu6_emit_lrz_flush(cmd, cs); + + tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true); + + tu_cs_sanity_check(cs); +} + +static void +tu_cmd_render_tiles(struct tu_cmd_buffer *cmd) +{ + const struct tu_tiling_config *tiling = &cmd->state.tiling_config; + + tu6_render_begin(cmd, &cmd->cs); + + for (uint32_t y = 0; y < tiling->tile_count.height; y++) { + for (uint32_t x = 0; x < tiling->tile_count.width; x++) { + struct tu_tile tile; + tu_tiling_config_get_tile(tiling, cmd->device, x, y, &tile); + tu6_render_tile(cmd, &cmd->cs, &tile); + } + } + + tu6_render_end(cmd, &cmd->cs); +} + +static void +tu_cmd_prepare_tile_load_ib(struct tu_cmd_buffer *cmd) +{ + const uint32_t tile_load_space = 16 + 32 * MAX_RTS; + const struct tu_subpass *subpass = cmd->state.subpass; + struct tu_attachment_state *attachments = cmd->state.attachments; + struct tu_cs sub_cs; + + VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs, + tile_load_space, &sub_cs); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + /* emit to tile-load sub_cs */ + tu6_emit_tile_load(cmd, &sub_cs); + + cmd->state.tile_load_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs); + + for (uint32_t i = 0; i < subpass->color_count; ++i) { + const uint32_t a = subpass->color_attachments[i].attachment; + if (a != VK_ATTACHMENT_UNUSED) + attachments[a].pending_clear_aspects = 0; + } +} + +static void +tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd) +{ + const uint32_t tile_store_space = 32 + 32 * MAX_RTS; + struct tu_cs sub_cs; + + VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs, + tile_store_space, &sub_cs); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + /* emit to tile-store sub_cs */ + tu6_emit_tile_store(cmd, &sub_cs); + + cmd->state.tile_store_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs); +} + +static void +tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd, + const VkRect2D *render_area) +{ + const struct tu_device *dev = cmd->device; + const struct tu_render_pass *pass = cmd->state.pass; + const struct tu_subpass *subpass = cmd->state.subpass; + struct tu_tiling_config *tiling = &cmd->state.tiling_config; + + uint32_t buffer_cpp[MAX_RTS + 2]; + uint32_t buffer_count = 0; + + for (uint32_t i = 0; i < subpass->color_count; ++i) { + const uint32_t a = subpass->color_attachments[i].attachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; + + const struct tu_render_pass_attachment *att = &pass->attachments[a]; + buffer_cpp[buffer_count++] = + vk_format_get_blocksize(att->format) * att->samples; + } + + if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { + const uint32_t a = subpass->depth_stencil_attachment.attachment; + const struct tu_render_pass_attachment *att = &pass->attachments[a]; + + /* TODO */ + assert(att->format != VK_FORMAT_D32_SFLOAT_S8_UINT); + + buffer_cpp[buffer_count++] = + vk_format_get_blocksize(att->format) * att->samples; + } + + tu_tiling_config_update(tiling, dev, buffer_cpp, buffer_count, + render_area); +} + +const struct tu_dynamic_state default_dynamic_state = { + .viewport = + { + .count = 0, + }, + .scissor = + { + .count = 0, + }, + .line_width = 1.0f, + .depth_bias = + { + .bias = 0.0f, + .clamp = 0.0f, + .slope = 0.0f, + }, + .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, + .depth_bounds = + { + .min = 0.0f, + .max = 1.0f, + }, + .stencil_compare_mask = + { + .front = ~0u, + .back = ~0u, + }, + .stencil_write_mask = + { + .front = ~0u, + .back = ~0u, + }, + .stencil_reference = + { + .front = 0u, + .back = 0u, + }, +}; + +static void UNUSED /* FINISHME */ +tu_bind_dynamic_state(struct tu_cmd_buffer *cmd_buffer, + const struct tu_dynamic_state *src) +{ + struct tu_dynamic_state *dest = &cmd_buffer->state.dynamic; + uint32_t copy_mask = src->mask; + uint32_t dest_mask = 0; + + tu_use_args(cmd_buffer); /* FINISHME */ + + /* Make sure to copy the number of viewports/scissors because they can + * only be specified at pipeline creation time. + */ + dest->viewport.count = src->viewport.count; + dest->scissor.count = src->scissor.count; + dest->discard_rectangle.count = src->discard_rectangle.count; + + if (copy_mask & TU_DYNAMIC_VIEWPORT) { + if (memcmp(&dest->viewport.viewports, &src->viewport.viewports, + src->viewport.count * sizeof(VkViewport))) { + typed_memcpy(dest->viewport.viewports, src->viewport.viewports, + src->viewport.count); + dest_mask |= TU_DYNAMIC_VIEWPORT; + } + } + + if (copy_mask & TU_DYNAMIC_SCISSOR) { + if (memcmp(&dest->scissor.scissors, &src->scissor.scissors, + src->scissor.count * sizeof(VkRect2D))) { + typed_memcpy(dest->scissor.scissors, src->scissor.scissors, + src->scissor.count); + dest_mask |= TU_DYNAMIC_SCISSOR; + } + } + + if (copy_mask & TU_DYNAMIC_LINE_WIDTH) { + if (dest->line_width != src->line_width) { + dest->line_width = src->line_width; + dest_mask |= TU_DYNAMIC_LINE_WIDTH; + } + } + + if (copy_mask & TU_DYNAMIC_DEPTH_BIAS) { + if (memcmp(&dest->depth_bias, &src->depth_bias, + sizeof(src->depth_bias))) { + dest->depth_bias = src->depth_bias; + dest_mask |= TU_DYNAMIC_DEPTH_BIAS; + } + } + + if (copy_mask & TU_DYNAMIC_BLEND_CONSTANTS) { + if (memcmp(&dest->blend_constants, &src->blend_constants, + sizeof(src->blend_constants))) { + typed_memcpy(dest->blend_constants, src->blend_constants, 4); + dest_mask |= TU_DYNAMIC_BLEND_CONSTANTS; + } + } + + if (copy_mask & TU_DYNAMIC_DEPTH_BOUNDS) { + if (memcmp(&dest->depth_bounds, &src->depth_bounds, + sizeof(src->depth_bounds))) { + dest->depth_bounds = src->depth_bounds; + dest_mask |= TU_DYNAMIC_DEPTH_BOUNDS; + } + } + + if (copy_mask & TU_DYNAMIC_STENCIL_COMPARE_MASK) { + if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask, + sizeof(src->stencil_compare_mask))) { + dest->stencil_compare_mask = src->stencil_compare_mask; + dest_mask |= TU_DYNAMIC_STENCIL_COMPARE_MASK; + } + } + + if (copy_mask & TU_DYNAMIC_STENCIL_WRITE_MASK) { + if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask, + sizeof(src->stencil_write_mask))) { + dest->stencil_write_mask = src->stencil_write_mask; + dest_mask |= TU_DYNAMIC_STENCIL_WRITE_MASK; + } + } + + if (copy_mask & TU_DYNAMIC_STENCIL_REFERENCE) { + if (memcmp(&dest->stencil_reference, &src->stencil_reference, + sizeof(src->stencil_reference))) { + dest->stencil_reference = src->stencil_reference; + dest_mask |= TU_DYNAMIC_STENCIL_REFERENCE; + } + } + + if (copy_mask & TU_DYNAMIC_DISCARD_RECTANGLE) { + if (memcmp(&dest->discard_rectangle.rectangles, + &src->discard_rectangle.rectangles, + src->discard_rectangle.count * sizeof(VkRect2D))) { + typed_memcpy(dest->discard_rectangle.rectangles, + src->discard_rectangle.rectangles, + src->discard_rectangle.count); + dest_mask |= TU_DYNAMIC_DISCARD_RECTANGLE; + } + } +} + +static VkResult +tu_create_cmd_buffer(struct tu_device *device, + struct tu_cmd_pool *pool, + VkCommandBufferLevel level, + VkCommandBuffer *pCommandBuffer) +{ + struct tu_cmd_buffer *cmd_buffer; + cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cmd_buffer == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + cmd_buffer->device = device; + cmd_buffer->pool = pool; + cmd_buffer->level = level; + + if (pool) { + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + cmd_buffer->queue_family_index = pool->queue_family_index; + + } else { + /* Init the pool_link so we can safely call list_del when we destroy + * the command buffer + */ + list_inithead(&cmd_buffer->pool_link); + cmd_buffer->queue_family_index = TU_QUEUE_GENERAL; + } + + tu_bo_list_init(&cmd_buffer->bo_list); + tu_cs_init(&cmd_buffer->cs, TU_CS_MODE_GROW, 4096); + tu_cs_init(&cmd_buffer->draw_cs, TU_CS_MODE_GROW, 4096); + tu_cs_init(&cmd_buffer->tile_cs, TU_CS_MODE_SUB_STREAM, 1024); + + *pCommandBuffer = tu_cmd_buffer_to_handle(cmd_buffer); + + list_inithead(&cmd_buffer->upload.list); + + cmd_buffer->marker_reg = REG_A6XX_CP_SCRATCH_REG( + cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY ? 7 : 6); + + VkResult result = tu_bo_init_new(device, &cmd_buffer->scratch_bo, 0x1000); + if (result != VK_SUCCESS) + return result; + + return VK_SUCCESS; +} + +static void +tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer) +{ + tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo); + + list_del(&cmd_buffer->pool_link); + + for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) + free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr); + + tu_cs_finish(cmd_buffer->device, &cmd_buffer->cs); + tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_cs); + tu_cs_finish(cmd_buffer->device, &cmd_buffer->tile_cs); + + tu_bo_list_destroy(&cmd_buffer->bo_list); + vk_free(&cmd_buffer->pool->alloc, cmd_buffer); +} + +static VkResult +tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer) +{ + cmd_buffer->wait_for_idle = true; + + cmd_buffer->record_result = VK_SUCCESS; + + tu_bo_list_reset(&cmd_buffer->bo_list); + tu_cs_reset(cmd_buffer->device, &cmd_buffer->cs); + tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_cs); + tu_cs_reset(cmd_buffer->device, &cmd_buffer->tile_cs); + + for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) { + cmd_buffer->descriptors[i].dirty = 0; + cmd_buffer->descriptors[i].valid = 0; + cmd_buffer->descriptors[i].push_dirty = false; + } + + cmd_buffer->status = TU_CMD_BUFFER_STATUS_INITIAL; + + return cmd_buffer->record_result; +} + +static VkResult +tu_cmd_state_setup_attachments(struct tu_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo *info) +{ + struct tu_cmd_state *state = &cmd_buffer->state; + const struct tu_framebuffer *fb = state->framebuffer; + const struct tu_render_pass *pass = state->pass; + + for (uint32_t i = 0; i < fb->attachment_count; ++i) { + const struct tu_image_view *iview = fb->attachments[i].attachment; + tu_bo_list_add(&cmd_buffer->bo_list, iview->image->bo, + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE); + } + + if (pass->attachment_count == 0) { + state->attachments = NULL; + return VK_SUCCESS; + } + + state->attachments = + vk_alloc(&cmd_buffer->pool->alloc, + pass->attachment_count * sizeof(state->attachments[0]), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (state->attachments == NULL) { + cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; + return cmd_buffer->record_result; + } + + for (uint32_t i = 0; i < pass->attachment_count; ++i) { + const struct tu_render_pass_attachment *att = &pass->attachments[i]; + VkImageAspectFlags att_aspects = vk_format_aspects(att->format); + VkImageAspectFlags clear_aspects = 0; + + if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { + /* color attachment */ + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; + } + } else { + /* depthstencil attachment */ + if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && + att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) + clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + } + + state->attachments[i].pending_clear_aspects = clear_aspects; + state->attachments[i].cleared_views = 0; + if (clear_aspects && info) { + assert(info->clearValueCount > i); + state->attachments[i].clear_value = info->pClearValues[i]; + } + + state->attachments[i].current_layout = att->initial_layout; + } + + return VK_SUCCESS; +} + +VkResult +tu_AllocateCommandBuffers(VkDevice _device, + const VkCommandBufferAllocateInfo *pAllocateInfo, + VkCommandBuffer *pCommandBuffers) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_cmd_pool, pool, pAllocateInfo->commandPool); + + VkResult result = VK_SUCCESS; + uint32_t i; + + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { + + if (!list_empty(&pool->free_cmd_buffers)) { + struct tu_cmd_buffer *cmd_buffer = list_first_entry( + &pool->free_cmd_buffers, struct tu_cmd_buffer, pool_link); + + list_del(&cmd_buffer->pool_link); + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + + result = tu_reset_cmd_buffer(cmd_buffer); + cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + cmd_buffer->level = pAllocateInfo->level; + + pCommandBuffers[i] = tu_cmd_buffer_to_handle(cmd_buffer); + } else { + result = tu_create_cmd_buffer(device, pool, pAllocateInfo->level, + &pCommandBuffers[i]); + } + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) { + tu_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i, + pCommandBuffers); + + /* From the Vulkan 1.0.66 spec: + * + * "vkAllocateCommandBuffers can be used to create multiple + * command buffers. If the creation of any of those command + * buffers fails, the implementation must destroy all + * successfully created command buffer objects from this + * command, set all entries of the pCommandBuffers array to + * NULL and return the error." + */ + memset(pCommandBuffers, 0, + sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount); + } + + return result; +} + +void +tu_FreeCommandBuffers(VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer *pCommandBuffers) +{ + for (uint32_t i = 0; i < commandBufferCount; i++) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + + if (cmd_buffer) { + if (cmd_buffer->pool) { + list_del(&cmd_buffer->pool_link); + list_addtail(&cmd_buffer->pool_link, + &cmd_buffer->pool->free_cmd_buffers); + } else + tu_cmd_buffer_destroy(cmd_buffer); + } + } +} + +VkResult +tu_ResetCommandBuffer(VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + return tu_reset_cmd_buffer(cmd_buffer); +} + +VkResult +tu_BeginCommandBuffer(VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo *pBeginInfo) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + VkResult result = VK_SUCCESS; + + if (cmd_buffer->status != TU_CMD_BUFFER_STATUS_INITIAL) { + /* If the command buffer has already been resetted with + * vkResetCommandBuffer, no need to do it again. + */ + result = tu_reset_cmd_buffer(cmd_buffer); + if (result != VK_SUCCESS) + return result; + } + + memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state)); + cmd_buffer->usage_flags = pBeginInfo->flags; + + tu_cs_begin(&cmd_buffer->cs); + + cmd_buffer->marker_seqno = 0; + cmd_buffer->scratch_seqno = 0; + + /* setup initial configuration into command buffer */ + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + switch (cmd_buffer->queue_family_index) { + case TU_QUEUE_GENERAL: + tu6_init_hw(cmd_buffer, &cmd_buffer->cs); + break; + default: + break; + } + } + + cmd_buffer->status = TU_CMD_BUFFER_STATUS_RECORDING; + + return VK_SUCCESS; +} + +void +tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + assert(firstBinding + bindingCount <= MAX_VBS); + + for (uint32_t i = 0; i < bindingCount; i++) { + cmd->state.vb.buffers[firstBinding + i] = + tu_buffer_from_handle(pBuffers[i]); + cmd->state.vb.offsets[firstBinding + i] = pOffsets[i]; + } + + /* VB states depend on VkPipelineVertexInputStateCreateInfo */ + cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS; +} + +void +tu_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + TU_FROM_HANDLE(tu_buffer, buf, buffer); + + /* initialize/update the restart index */ + if (!cmd->state.index_buffer || cmd->state.index_type != indexType) { + struct tu_cs *draw_cs = &cmd->draw_cs; + VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 2); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + tu6_emit_restart_index( + draw_cs, indexType == VK_INDEX_TYPE_UINT32 ? 0xffffffff : 0xffff); + + tu_cs_sanity_check(draw_cs); + } + + /* track the BO */ + if (cmd->state.index_buffer != buf) + tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ); + + cmd->state.index_buffer = buf; + cmd->state.index_offset = offset; + cmd->state.index_type = indexType; +} + +void +tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet *pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t *pDynamicOffsets) +{ +} + +void +tu_CmdPushConstants(VkCommandBuffer commandBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t offset, + uint32_t size, + const void *pValues) +{ +} + +VkResult +tu_EndCommandBuffer(VkCommandBuffer commandBuffer) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + + if (cmd_buffer->scratch_seqno) { + tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->scratch_bo, + MSM_SUBMIT_BO_WRITE); + } + + for (uint32_t i = 0; i < cmd_buffer->draw_cs.bo_count; i++) { + tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_cs.bos[i], + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); + } + + for (uint32_t i = 0; i < cmd_buffer->tile_cs.bo_count; i++) { + tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->tile_cs.bos[i], + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); + } + + tu_cs_end(&cmd_buffer->cs); + + assert(!cmd_buffer->state.attachments); + + cmd_buffer->status = TU_CMD_BUFFER_STATUS_EXECUTABLE; + + return cmd_buffer->record_result; +} + +void +tu_CmdBindPipeline(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline); + + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd->state.pipeline = pipeline; + cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE; + break; + case VK_PIPELINE_BIND_POINT_COMPUTE: + tu_finishme("binding compute pipeline"); + break; + default: + unreachable("unrecognized pipeline bind point"); + break; + } +} + +void +tu_CmdSetViewport(VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewport *pViewports) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs *draw_cs = &cmd->draw_cs; + + VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 12); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + assert(firstViewport == 0 && viewportCount == 1); + tu6_emit_viewport(draw_cs, pViewports); + + tu_cs_sanity_check(draw_cs); +} + +void +tu_CmdSetScissor(VkCommandBuffer commandBuffer, + uint32_t firstScissor, + uint32_t scissorCount, + const VkRect2D *pScissors) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs *draw_cs = &cmd->draw_cs; + + VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 3); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + assert(firstScissor == 0 && scissorCount == 1); + tu6_emit_scissor(draw_cs, pScissors); + + tu_cs_sanity_check(draw_cs); +} + +void +tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + cmd->state.dynamic.line_width = lineWidth; + + /* line width depends on VkPipelineRasterizationStateCreateInfo */ + cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH; +} + +void +tu_CmdSetDepthBias(VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs *draw_cs = &cmd->draw_cs; + + VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 4); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + tu6_emit_depth_bias(draw_cs, depthBiasConstantFactor, depthBiasClamp, + depthBiasSlopeFactor); + + tu_cs_sanity_check(draw_cs); +} + +void +tu_CmdSetBlendConstants(VkCommandBuffer commandBuffer, + const float blendConstants[4]) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs *draw_cs = &cmd->draw_cs; + + VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 5); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + tu6_emit_blend_constants(draw_cs, blendConstants); + + tu_cs_sanity_check(draw_cs); +} + +void +tu_CmdSetDepthBounds(VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds) +{ +} + +void +tu_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd->state.dynamic.stencil_compare_mask.front = compareMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd->state.dynamic.stencil_compare_mask.back = compareMask; + + /* the front/back compare masks must be updated together */ + cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; +} + +void +tu_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd->state.dynamic.stencil_write_mask.front = writeMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd->state.dynamic.stencil_write_mask.back = writeMask; + + /* the front/back write masks must be updated together */ + cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; +} + +void +tu_CmdSetStencilReference(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd->state.dynamic.stencil_reference.front = reference; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd->state.dynamic.stencil_reference.back = reference; + + /* the front/back references must be updated together */ + cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; +} + +void +tu_CmdExecuteCommands(VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer *pCmdBuffers) +{ +} + +VkResult +tu_CreateCommandPool(VkDevice _device, + const VkCommandPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkCommandPool *pCmdPool) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_cmd_pool *pool; + + pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pool == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + if (pAllocator) + pool->alloc = *pAllocator; + else + pool->alloc = device->alloc; + + list_inithead(&pool->cmd_buffers); + list_inithead(&pool->free_cmd_buffers); + + pool->queue_family_index = pCreateInfo->queueFamilyIndex; + + *pCmdPool = tu_cmd_pool_to_handle(pool); + + return VK_SUCCESS; +} + +void +tu_DestroyCommandPool(VkDevice _device, + VkCommandPool commandPool, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool); + + if (!pool) + return; + + list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer, + &pool->cmd_buffers, pool_link) + { + tu_cmd_buffer_destroy(cmd_buffer); + } + + list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer, + &pool->free_cmd_buffers, pool_link) + { + tu_cmd_buffer_destroy(cmd_buffer); + } + + vk_free2(&device->alloc, pAllocator, pool); +} + +VkResult +tu_ResetCommandPool(VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags) +{ + TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool); + VkResult result; + + list_for_each_entry(struct tu_cmd_buffer, cmd_buffer, &pool->cmd_buffers, + pool_link) + { + result = tu_reset_cmd_buffer(cmd_buffer); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +void +tu_TrimCommandPool(VkDevice device, + VkCommandPool commandPool, + VkCommandPoolTrimFlags flags) +{ + TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool); + + if (!pool) + return; + + list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer, + &pool->free_cmd_buffers, pool_link) + { + tu_cmd_buffer_destroy(cmd_buffer); + } +} + +void +tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo *pRenderPassBegin, + VkSubpassContents contents) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + TU_FROM_HANDLE(tu_render_pass, pass, pRenderPassBegin->renderPass); + TU_FROM_HANDLE(tu_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + VkResult result; + + cmd_buffer->state.pass = pass; + cmd_buffer->state.subpass = pass->subpasses; + cmd_buffer->state.framebuffer = framebuffer; + + result = tu_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); + if (result != VK_SUCCESS) + return; + + tu_cmd_update_tiling_config(cmd_buffer, &pRenderPassBegin->renderArea); + tu_cmd_prepare_tile_load_ib(cmd_buffer); + tu_cmd_prepare_tile_store_ib(cmd_buffer); + + /* draw_cs should contain entries only for this render pass */ + assert(!cmd_buffer->draw_cs.entry_count); + tu_cs_begin(&cmd_buffer->draw_cs); +} + +void +tu_CmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo *pRenderPassBeginInfo, + const VkSubpassBeginInfoKHR *pSubpassBeginInfo) +{ + tu_CmdBeginRenderPass(commandBuffer, pRenderPassBeginInfo, + pSubpassBeginInfo->contents); +} + +void +tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + tu_cmd_render_tiles(cmd); + + cmd->state.subpass++; + + tu_cmd_update_tiling_config(cmd, NULL); + tu_cmd_prepare_tile_load_ib(cmd); + tu_cmd_prepare_tile_store_ib(cmd); +} + +void +tu_CmdNextSubpass2KHR(VkCommandBuffer commandBuffer, + const VkSubpassBeginInfoKHR *pSubpassBeginInfo, + const VkSubpassEndInfoKHR *pSubpassEndInfo) +{ + tu_CmdNextSubpass(commandBuffer, pSubpassBeginInfo->contents); +} + +struct tu_draw_info +{ + /** + * Number of vertices. + */ + uint32_t count; + + /** + * Index of the first vertex. + */ + int32_t vertex_offset; + + /** + * First instance id. + */ + uint32_t first_instance; + + /** + * Number of instances. + */ + uint32_t instance_count; + + /** + * First index (indexed draws only). + */ + uint32_t first_index; + + /** + * Whether it's an indexed draw. + */ + bool indexed; + + /** + * Indirect draw parameters resource. + */ + struct tu_buffer *indirect; + uint64_t indirect_offset; + uint32_t stride; + + /** + * Draw count parameters resource. + */ + struct tu_buffer *count_buffer; + uint64_t count_buffer_offset; +}; + +enum tu_draw_state_group_id +{ + TU_DRAW_STATE_PROGRAM, + TU_DRAW_STATE_PROGRAM_BINNING, + TU_DRAW_STATE_VI, + TU_DRAW_STATE_VI_BINNING, + TU_DRAW_STATE_VP, + TU_DRAW_STATE_RAST, + TU_DRAW_STATE_DS, + TU_DRAW_STATE_BLEND, + + TU_DRAW_STATE_COUNT, +}; + +struct tu_draw_state_group +{ + enum tu_draw_state_group_id id; + uint32_t enable_mask; + const struct tu_cs_entry *ib; +}; + +static void +tu6_bind_draw_states(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_draw_info *draw) +{ + const struct tu_pipeline *pipeline = cmd->state.pipeline; + const struct tu_dynamic_state *dynamic = &cmd->state.dynamic; + struct tu_draw_state_group draw_state_groups[TU_DRAW_STATE_COUNT]; + uint32_t draw_state_group_count = 0; + + VkResult result = tu_cs_reserve_space(cmd->device, cs, 256); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + /* TODO lrz */ + + uint32_t pc_primitive_cntl = 0; + if (pipeline->ia.primitive_restart && draw->indexed) + pc_primitive_cntl |= A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART; + + tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9990, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0); + + tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_0, 1); + tu_cs_emit(cs, pc_primitive_cntl); + + if (cmd->state.dirty & + (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH) && + (pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) { + tu6_emit_gras_su_cntl(cs, pipeline->rast.gras_su_cntl, + dynamic->line_width); + } + + if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) && + (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) { + tu6_emit_stencil_compare_mask(cs, dynamic->stencil_compare_mask.front, + dynamic->stencil_compare_mask.back); + } + + if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) && + (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) { + tu6_emit_stencil_write_mask(cs, dynamic->stencil_write_mask.front, + dynamic->stencil_write_mask.back); + } + + if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) && + (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) { + tu6_emit_stencil_reference(cs, dynamic->stencil_reference.front, + dynamic->stencil_reference.back); + } + + if (cmd->state.dirty & + (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_VERTEX_BUFFERS)) { + for (uint32_t i = 0; i < pipeline->vi.count; i++) { + const uint32_t binding = pipeline->vi.bindings[i]; + const uint32_t stride = pipeline->vi.strides[i]; + const struct tu_buffer *buf = cmd->state.vb.buffers[binding]; + const VkDeviceSize offset = buf->bo_offset + + cmd->state.vb.offsets[binding] + + pipeline->vi.offsets[i]; + const VkDeviceSize size = + offset < buf->bo->size ? buf->bo->size - offset : 0; + + tu_cs_emit_pkt4(cs, REG_A6XX_VFD_FETCH(i), 4); + tu_cs_emit_qw(cs, buf->bo->iova + offset); + tu_cs_emit(cs, size); + tu_cs_emit(cs, stride); + } + } + + /* TODO shader consts */ + + if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) { + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_PROGRAM, + .enable_mask = 0x6, + .ib = &pipeline->program.state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_PROGRAM_BINNING, + .enable_mask = 0x1, + .ib = &pipeline->program.binning_state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_VI, + .enable_mask = 0x6, + .ib = &pipeline->vi.state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_VI_BINNING, + .enable_mask = 0x1, + .ib = &pipeline->vi.binning_state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_VP, + .enable_mask = 0x7, + .ib = &pipeline->vp.state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_RAST, + .enable_mask = 0x7, + .ib = &pipeline->rast.state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_DS, + .enable_mask = 0x7, + .ib = &pipeline->ds.state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_BLEND, + .enable_mask = 0x7, + .ib = &pipeline->blend.state_ib, + }; + } + + tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_group_count); + for (uint32_t i = 0; i < draw_state_group_count; i++) { + const struct tu_draw_state_group *group = &draw_state_groups[i]; + + uint32_t cp_set_draw_state = + CP_SET_DRAW_STATE__0_COUNT(group->ib->size / 4) | + CP_SET_DRAW_STATE__0_ENABLE_MASK(group->enable_mask) | + CP_SET_DRAW_STATE__0_GROUP_ID(group->id); + uint64_t iova; + if (group->ib->size) { + iova = group->ib->bo->iova + group->ib->offset; + } else { + cp_set_draw_state |= CP_SET_DRAW_STATE__0_DISABLE; + iova = 0; + } + + tu_cs_emit(cs, cp_set_draw_state); + tu_cs_emit_qw(cs, iova); + } + + tu_cs_sanity_check(cs); + + /* track BOs */ + if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) { + tu_bo_list_add(&cmd->bo_list, &pipeline->program.binary_bo, + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); + for (uint32_t i = 0; i < pipeline->cs.bo_count; i++) { + tu_bo_list_add(&cmd->bo_list, pipeline->cs.bos[i], + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); + } + } + if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) { + for (uint32_t i = 0; i < MAX_VBS; i++) { + const struct tu_buffer *buf = cmd->state.vb.buffers[i]; + if (buf) + tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ); + } + } + + cmd->state.dirty = 0; +} + +static void +tu6_emit_draw_direct(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_draw_info *draw) +{ + + const enum pc_di_primtype primtype = cmd->state.pipeline->ia.primtype; + + tu_cs_emit_pkt4(cs, REG_A6XX_VFD_INDEX_OFFSET, 2); + tu_cs_emit(cs, draw->vertex_offset); + tu_cs_emit(cs, draw->first_instance); + + /* TODO hw binning */ + if (draw->indexed) { + const enum a4xx_index_size index_size = + tu6_index_size(cmd->state.index_type); + const uint32_t index_bytes = + (cmd->state.index_type == VK_INDEX_TYPE_UINT32) ? 4 : 2; + const struct tu_buffer *buf = cmd->state.index_buffer; + const VkDeviceSize offset = buf->bo_offset + cmd->state.index_offset + + index_bytes * draw->first_index; + const uint32_t size = index_bytes * draw->count; + + const uint32_t cp_draw_indx = + CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | + CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_DMA) | + CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) | + CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY) | 0x2000; + + tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 7); + tu_cs_emit(cs, cp_draw_indx); + tu_cs_emit(cs, draw->instance_count); + tu_cs_emit(cs, draw->count); + tu_cs_emit(cs, 0x0); /* XXX */ + tu_cs_emit_qw(cs, buf->bo->iova + offset); + tu_cs_emit(cs, size); + } else { + const uint32_t cp_draw_indx = + CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | + CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) | + CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY) | 0x2000; + + tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3); + tu_cs_emit(cs, cp_draw_indx); + tu_cs_emit(cs, draw->instance_count); + tu_cs_emit(cs, draw->count); + } +} + +static void +tu_draw(struct tu_cmd_buffer *cmd, const struct tu_draw_info *draw) +{ + struct tu_cs *cs = &cmd->draw_cs; + + tu6_bind_draw_states(cmd, cs, draw); + + VkResult result = tu_cs_reserve_space(cmd->device, cs, 32); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + if (draw->indirect) { + tu_finishme("indirect draw"); + return; + } + + /* TODO tu6_emit_marker should pick different regs depending on cs */ + tu6_emit_marker(cmd, cs); + tu6_emit_draw_direct(cmd, cs, draw); + tu6_emit_marker(cmd, cs); + + cmd->wait_for_idle = true; + + tu_cs_sanity_check(cs); +} + +void +tu_CmdDraw(VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + struct tu_draw_info info = {}; + + info.count = vertexCount; + info.instance_count = instanceCount; + info.first_instance = firstInstance; + info.vertex_offset = firstVertex; + + tu_draw(cmd_buffer, &info); +} + +void +tu_CmdDrawIndexed(VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + struct tu_draw_info info = {}; + + info.indexed = true; + info.count = indexCount; + info.instance_count = instanceCount; + info.first_index = firstIndex; + info.vertex_offset = vertexOffset; + info.first_instance = firstInstance; + + tu_draw(cmd_buffer, &info); +} + +void +tu_CmdDrawIndirect(VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + TU_FROM_HANDLE(tu_buffer, buffer, _buffer); + struct tu_draw_info info = {}; + + info.count = drawCount; + info.indirect = buffer; + info.indirect_offset = offset; + info.stride = stride; + + tu_draw(cmd_buffer, &info); +} + +void +tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + TU_FROM_HANDLE(tu_buffer, buffer, _buffer); + struct tu_draw_info info = {}; + + info.indexed = true; + info.count = drawCount; + info.indirect = buffer; + info.indirect_offset = offset; + info.stride = stride; + + tu_draw(cmd_buffer, &info); +} + +struct tu_dispatch_info +{ + /** + * Determine the layout of the grid (in block units) to be used. + */ + uint32_t blocks[3]; + + /** + * A starting offset for the grid. If unaligned is set, the offset + * must still be aligned. + */ + uint32_t offsets[3]; + /** + * Whether it's an unaligned compute dispatch. + */ + bool unaligned; + + /** + * Indirect compute parameters resource. + */ + struct tu_buffer *indirect; + uint64_t indirect_offset; +}; + +static void +tu_dispatch(struct tu_cmd_buffer *cmd_buffer, + const struct tu_dispatch_info *info) +{ +} + +void +tu_CmdDispatchBase(VkCommandBuffer commandBuffer, + uint32_t base_x, + uint32_t base_y, + uint32_t base_z, + uint32_t x, + uint32_t y, + uint32_t z) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + struct tu_dispatch_info info = {}; + + info.blocks[0] = x; + info.blocks[1] = y; + info.blocks[2] = z; + + info.offsets[0] = base_x; + info.offsets[1] = base_y; + info.offsets[2] = base_z; + tu_dispatch(cmd_buffer, &info); +} + +void +tu_CmdDispatch(VkCommandBuffer commandBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + tu_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z); +} + +void +tu_CmdDispatchIndirect(VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + TU_FROM_HANDLE(tu_buffer, buffer, _buffer); + struct tu_dispatch_info info = {}; + + info.indirect = buffer; + info.indirect_offset = offset; + + tu_dispatch(cmd_buffer, &info); +} + +void +tu_CmdEndRenderPass(VkCommandBuffer commandBuffer) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + + tu_cs_end(&cmd_buffer->draw_cs); + + tu_cmd_render_tiles(cmd_buffer); + + /* discard draw_cs entries now that the tiles are rendered */ + tu_cs_discard_entries(&cmd_buffer->draw_cs); + + vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); + cmd_buffer->state.attachments = NULL; + + cmd_buffer->state.pass = NULL; + cmd_buffer->state.subpass = NULL; + cmd_buffer->state.framebuffer = NULL; +} + +void +tu_CmdEndRenderPass2KHR(VkCommandBuffer commandBuffer, + const VkSubpassEndInfoKHR *pSubpassEndInfo) +{ + tu_CmdEndRenderPass(commandBuffer); +} + +struct tu_barrier_info +{ + uint32_t eventCount; + const VkEvent *pEvents; + VkPipelineStageFlags srcStageMask; +}; + +static void +tu_barrier(struct tu_cmd_buffer *cmd_buffer, + uint32_t memoryBarrierCount, + const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers, + const struct tu_barrier_info *info) +{ +} + +void +tu_CmdPipelineBarrier(VkCommandBuffer commandBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memoryBarrierCount, + const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + struct tu_barrier_info info; + + info.eventCount = 0; + info.pEvents = NULL; + info.srcStageMask = srcStageMask; + + tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, + bufferMemoryBarrierCount, pBufferMemoryBarriers, + imageMemoryBarrierCount, pImageMemoryBarriers, &info); +} + +static void +write_event(struct tu_cmd_buffer *cmd_buffer, + struct tu_event *event, + VkPipelineStageFlags stageMask, + unsigned value) +{ +} + +void +tu_CmdSetEvent(VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + TU_FROM_HANDLE(tu_event, event, _event); + + write_event(cmd_buffer, event, stageMask, 1); +} + +void +tu_CmdResetEvent(VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + TU_FROM_HANDLE(tu_event, event, _event); + + write_event(cmd_buffer, event, stageMask, 0); +} + +void +tu_CmdWaitEvents(VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent *pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + struct tu_barrier_info info; + + info.eventCount = eventCount; + info.pEvents = pEvents; + info.srcStageMask = 0; + + tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, + bufferMemoryBarrierCount, pBufferMemoryBarriers, + imageMemoryBarrierCount, pImageMemoryBarriers, &info); +} + +void +tu_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask) +{ + /* No-op */ +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_cs.c b/lib/mesa/src/freedreno/vulkan/tu_cs.c new file mode 100644 index 000000000..48242f813 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_cs.c @@ -0,0 +1,368 @@ +/* + * Copyright © 2019 Google LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_cs.h" + +/** + * Initialize a command stream. + */ +void +tu_cs_init(struct tu_cs *cs, enum tu_cs_mode mode, uint32_t initial_size) +{ + assert(mode != TU_CS_MODE_EXTERNAL); + + memset(cs, 0, sizeof(*cs)); + + cs->mode = mode; + cs->next_bo_size = initial_size; +} + +/** + * Initialize a command stream as a wrapper to an external buffer. + */ +void +tu_cs_init_external(struct tu_cs *cs, uint32_t *start, uint32_t *end) +{ + memset(cs, 0, sizeof(*cs)); + + cs->mode = TU_CS_MODE_EXTERNAL; + cs->start = cs->reserved_end = cs->cur = start; + cs->end = end; +} + +/** + * Finish and release all resources owned by a command stream. + */ +void +tu_cs_finish(struct tu_device *dev, struct tu_cs *cs) +{ + for (uint32_t i = 0; i < cs->bo_count; ++i) { + tu_bo_finish(dev, cs->bos[i]); + free(cs->bos[i]); + } + + free(cs->entries); + free(cs->bos); +} + +/** + * Get the offset of the command packets emitted since the last call to + * tu_cs_add_entry. + */ +static uint32_t +tu_cs_get_offset(const struct tu_cs *cs) +{ + assert(cs->bo_count); + return cs->start - (uint32_t *) cs->bos[cs->bo_count - 1]->map; +} + +/** + * Get the size of the command packets emitted since the last call to + * tu_cs_add_entry. + */ +static uint32_t +tu_cs_get_size(const struct tu_cs *cs) +{ + return cs->cur - cs->start; +} + +/** + * Get the size of the remaining space in the current BO. + */ +static uint32_t +tu_cs_get_space(const struct tu_cs *cs) +{ + return cs->end - cs->cur; +} + +/** + * Return true if there is no command packet emitted since the last call to + * tu_cs_add_entry. + */ +static uint32_t +tu_cs_is_empty(const struct tu_cs *cs) +{ + return tu_cs_get_size(cs) == 0; +} + +/* + * Allocate and add a BO to a command stream. Following command packets will + * be emitted to the new BO. + */ +static VkResult +tu_cs_add_bo(struct tu_device *dev, struct tu_cs *cs, uint32_t size) +{ + /* no BO for TU_CS_MODE_EXTERNAL */ + assert(cs->mode != TU_CS_MODE_EXTERNAL); + + /* no dangling command packet */ + assert(tu_cs_is_empty(cs)); + + /* grow cs->bos if needed */ + if (cs->bo_count == cs->bo_capacity) { + uint32_t new_capacity = MAX2(4, 2 * cs->bo_capacity); + struct tu_bo **new_bos = + realloc(cs->bos, new_capacity * sizeof(struct tu_bo *)); + if (!new_bos) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + cs->bo_capacity = new_capacity; + cs->bos = new_bos; + } + + struct tu_bo *new_bo = malloc(sizeof(struct tu_bo)); + if (!new_bo) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + VkResult result = tu_bo_init_new(dev, new_bo, size * sizeof(uint32_t)); + if (result != VK_SUCCESS) { + free(new_bo); + return result; + } + + result = tu_bo_map(dev, new_bo); + if (result != VK_SUCCESS) { + tu_bo_finish(dev, new_bo); + free(new_bo); + return result; + } + + cs->bos[cs->bo_count++] = new_bo; + + cs->start = cs->cur = cs->reserved_end = (uint32_t *) new_bo->map; + cs->end = cs->start + new_bo->size / sizeof(uint32_t); + + return VK_SUCCESS; +} + +/** + * Reserve an IB entry. + */ +static VkResult +tu_cs_reserve_entry(struct tu_device *dev, struct tu_cs *cs) +{ + /* entries are only for TU_CS_MODE_GROW */ + assert(cs->mode == TU_CS_MODE_GROW); + + /* grow cs->entries if needed */ + if (cs->entry_count == cs->entry_capacity) { + uint32_t new_capacity = MAX2(4, cs->entry_capacity * 2); + struct tu_cs_entry *new_entries = + realloc(cs->entries, new_capacity * sizeof(struct tu_cs_entry)); + if (!new_entries) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + cs->entry_capacity = new_capacity; + cs->entries = new_entries; + } + + return VK_SUCCESS; +} + +/** + * Add an IB entry for the command packets emitted since the last call to this + * function. + */ +static void +tu_cs_add_entry(struct tu_cs *cs) +{ + /* entries are only for TU_CS_MODE_GROW */ + assert(cs->mode == TU_CS_MODE_GROW); + + /* disallow empty entry */ + assert(!tu_cs_is_empty(cs)); + + /* + * because we disallow empty entry, tu_cs_add_bo and tu_cs_reserve_entry + * must both have been called + */ + assert(cs->bo_count); + assert(cs->entry_count < cs->entry_capacity); + + /* add an entry for [cs->start, cs->cur] */ + cs->entries[cs->entry_count++] = (struct tu_cs_entry) { + .bo = cs->bos[cs->bo_count - 1], + .size = tu_cs_get_size(cs) * sizeof(uint32_t), + .offset = tu_cs_get_offset(cs) * sizeof(uint32_t), + }; + + cs->start = cs->cur; +} + +/** + * Begin (or continue) command packet emission. This does nothing but sanity + * checks currently. \a cs must not be in TU_CS_MODE_SUB_STREAM mode. + */ +void +tu_cs_begin(struct tu_cs *cs) +{ + assert(cs->mode != TU_CS_MODE_SUB_STREAM); + assert(tu_cs_is_empty(cs)); +} + +/** + * End command packet emission. This adds an IB entry when \a cs is in + * TU_CS_MODE_GROW mode. + */ +void +tu_cs_end(struct tu_cs *cs) +{ + assert(cs->mode != TU_CS_MODE_SUB_STREAM); + + if (cs->mode == TU_CS_MODE_GROW && !tu_cs_is_empty(cs)) + tu_cs_add_entry(cs); +} + +/** + * Begin command packet emission to a sub-stream. \a cs must be in + * TU_CS_MODE_SUB_STREAM mode. + * + * Return \a sub_cs which is in TU_CS_MODE_EXTERNAL mode. tu_cs_begin and + * tu_cs_reserve_space are implied and \a sub_cs is ready for command packet + * emission. + */ +VkResult +tu_cs_begin_sub_stream(struct tu_device *dev, + struct tu_cs *cs, + uint32_t size, + struct tu_cs *sub_cs) +{ + assert(cs->mode == TU_CS_MODE_SUB_STREAM); + assert(size); + + VkResult result = tu_cs_reserve_space(dev, cs, size); + if (result != VK_SUCCESS) + return result; + + tu_cs_init_external(sub_cs, cs->cur, cs->reserved_end); + tu_cs_begin(sub_cs); + result = tu_cs_reserve_space(dev, sub_cs, size); + assert(result == VK_SUCCESS); + + return VK_SUCCESS; +} + +/** + * End command packet emission to a sub-stream. \a sub_cs becomes invalid + * after this call. + * + * Return an IB entry for the sub-stream. The entry has the same lifetime as + * \a cs. + */ +struct tu_cs_entry +tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs) +{ + assert(cs->mode == TU_CS_MODE_SUB_STREAM); + assert(cs->bo_count); + assert(sub_cs->start == cs->cur && sub_cs->end == cs->reserved_end); + tu_cs_sanity_check(sub_cs); + + tu_cs_end(sub_cs); + + cs->cur = sub_cs->cur; + + struct tu_cs_entry entry = { + .bo = cs->bos[cs->bo_count - 1], + .size = tu_cs_get_size(cs) * sizeof(uint32_t), + .offset = tu_cs_get_offset(cs) * sizeof(uint32_t), + }; + + cs->start = cs->cur; + + return entry; +} + +/** + * Reserve space from a command stream for \a reserved_size uint32_t values. + * This never fails when \a cs has mode TU_CS_MODE_EXTERNAL. + */ +VkResult +tu_cs_reserve_space(struct tu_device *dev, + struct tu_cs *cs, + uint32_t reserved_size) +{ + if (tu_cs_get_space(cs) < reserved_size) { + if (cs->mode == TU_CS_MODE_EXTERNAL) { + unreachable("cannot grow external buffer"); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + /* add an entry for the exiting command packets */ + if (!tu_cs_is_empty(cs)) { + /* no direct command packet for TU_CS_MODE_SUB_STREAM */ + assert(cs->mode != TU_CS_MODE_SUB_STREAM); + + tu_cs_add_entry(cs); + } + + /* switch to a new BO */ + uint32_t new_size = MAX2(cs->next_bo_size, reserved_size); + VkResult result = tu_cs_add_bo(dev, cs, new_size); + if (result != VK_SUCCESS) + return result; + + /* double the size for the next bo */ + new_size <<= 1; + if (cs->next_bo_size < new_size) + cs->next_bo_size = new_size; + } + + assert(tu_cs_get_space(cs) >= reserved_size); + cs->reserved_end = cs->cur + reserved_size; + + if (cs->mode == TU_CS_MODE_GROW) { + /* reserve an entry for the next call to this function or tu_cs_end */ + return tu_cs_reserve_entry(dev, cs); + } + + return VK_SUCCESS; +} + +/** + * Reset a command stream to its initial state. This discards all comand + * packets in \a cs, but does not necessarily release all resources. + */ +void +tu_cs_reset(struct tu_device *dev, struct tu_cs *cs) +{ + if (cs->mode == TU_CS_MODE_EXTERNAL) { + assert(!cs->bo_count && !cs->entry_count); + cs->reserved_end = cs->cur = cs->start; + return; + } + + for (uint32_t i = 0; i + 1 < cs->bo_count; ++i) { + tu_bo_finish(dev, cs->bos[i]); + free(cs->bos[i]); + } + + if (cs->bo_count) { + cs->bos[0] = cs->bos[cs->bo_count - 1]; + cs->bo_count = 1; + + cs->start = cs->cur = cs->reserved_end = (uint32_t *) cs->bos[0]->map; + cs->end = cs->start + cs->bos[0]->size / sizeof(uint32_t); + } + + cs->entry_count = 0; +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_cs.h b/lib/mesa/src/freedreno/vulkan/tu_cs.h new file mode 100644 index 000000000..f3e0ade2a --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_cs.h @@ -0,0 +1,200 @@ +/* + * Copyright © 2019 Google LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef TU_CS_H +#define TU_CS_H + +#include "tu_private.h" + +#include "registers/adreno_pm4.xml.h" + +void +tu_cs_init(struct tu_cs *cs, enum tu_cs_mode mode, uint32_t initial_size); + +void +tu_cs_init_external(struct tu_cs *cs, uint32_t *start, uint32_t *end); + +void +tu_cs_finish(struct tu_device *dev, struct tu_cs *cs); + +void +tu_cs_begin(struct tu_cs *cs); + +void +tu_cs_end(struct tu_cs *cs); + +VkResult +tu_cs_begin_sub_stream(struct tu_device *dev, + struct tu_cs *cs, + uint32_t size, + struct tu_cs *sub_cs); + +struct tu_cs_entry +tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs); + +VkResult +tu_cs_reserve_space(struct tu_device *dev, + struct tu_cs *cs, + uint32_t reserved_size); + +void +tu_cs_reset(struct tu_device *dev, struct tu_cs *cs); + +/** + * Discard all entries. This allows \a cs to be reused while keeping the + * existing BOs and command packets intact. + */ +static inline void +tu_cs_discard_entries(struct tu_cs *cs) +{ + assert(cs->mode == TU_CS_MODE_GROW); + cs->entry_count = 0; +} + +/** + * Get the size needed for tu_cs_emit_call. + */ +static inline uint32_t +tu_cs_get_call_size(const struct tu_cs *cs) +{ + assert(cs->mode == TU_CS_MODE_GROW); + /* each CP_INDIRECT_BUFFER needs 4 dwords */ + return cs->entry_count * 4; +} + +/** + * Assert that we did not exceed the reserved space. + */ +static inline void +tu_cs_sanity_check(const struct tu_cs *cs) +{ + assert(cs->start <= cs->cur); + assert(cs->cur <= cs->reserved_end); + assert(cs->reserved_end <= cs->end); +} + +/** + * Emit a uint32_t value into a command stream, without boundary checking. + */ +static inline void +tu_cs_emit(struct tu_cs *cs, uint32_t value) +{ + assert(cs->cur < cs->reserved_end); + *cs->cur = value; + ++cs->cur; +} + +/** + * Emit an array of uint32_t into a command stream, without boundary checking. + */ +static inline void +tu_cs_emit_array(struct tu_cs *cs, const uint32_t *values, uint32_t length) +{ + assert(cs->cur + length <= cs->reserved_end); + memcpy(cs->cur, values, sizeof(uint32_t) * length); + cs->cur += length; +} + +static inline unsigned +tu_odd_parity_bit(unsigned val) +{ + /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel + * note that we want odd parity so 0x6996 is inverted. + */ + val ^= val >> 16; + val ^= val >> 8; + val ^= val >> 4; + val &= 0xf; + return (~0x6996 >> val) & 1; +} + +/** + * Emit a type-4 command packet header into a command stream. + */ +static inline void +tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt) +{ + tu_cs_emit(cs, CP_TYPE4_PKT | cnt | (tu_odd_parity_bit(cnt) << 7) | + ((regindx & 0x3ffff) << 8) | + ((tu_odd_parity_bit(regindx) << 27))); +} + +/** + * Emit a type-7 command packet header into a command stream. + */ +static inline void +tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt) +{ + tu_cs_emit(cs, CP_TYPE7_PKT | cnt | (tu_odd_parity_bit(cnt) << 15) | + ((opcode & 0x7f) << 16) | + ((tu_odd_parity_bit(opcode) << 23))); +} + +static inline void +tu_cs_emit_wfi(struct tu_cs *cs) +{ + tu_cs_emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0); +} + +static inline void +tu_cs_emit_qw(struct tu_cs *cs, uint64_t value) +{ + tu_cs_emit(cs, (uint32_t) value); + tu_cs_emit(cs, (uint32_t) (value >> 32)); +} + +static inline void +tu_cs_emit_write_reg(struct tu_cs *cs, uint16_t reg, uint32_t value) +{ + tu_cs_emit_pkt4(cs, reg, 1); + tu_cs_emit(cs, value); +} + +/** + * Emit a CP_INDIRECT_BUFFER command packet. + */ +static inline void +tu_cs_emit_ib(struct tu_cs *cs, const struct tu_cs_entry *entry) +{ + assert(entry->bo); + assert(entry->size && entry->offset + entry->size <= entry->bo->size); + assert(entry->size % sizeof(uint32_t) == 0); + assert(entry->offset % sizeof(uint32_t) == 0); + + tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3); + tu_cs_emit_qw(cs, entry->bo->iova + entry->offset); + tu_cs_emit(cs, entry->size / sizeof(uint32_t)); +} + +/** + * Emit a CP_INDIRECT_BUFFER command packet for each entry in the target + * command stream. + */ +static inline void +tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target) +{ + assert(target->mode == TU_CS_MODE_GROW); + for (uint32_t i = 0; i < target->entry_count; i++) + tu_cs_emit_ib(cs, target->entries + i); +} + +#endif /* TU_CS_H */ diff --git a/lib/mesa/src/freedreno/vulkan/tu_descriptor_set.c b/lib/mesa/src/freedreno/vulkan/tu_descriptor_set.c new file mode 100644 index 000000000..0f49d26e2 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_descriptor_set.c @@ -0,0 +1,570 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "tu_private.h" + +#include <assert.h> +#include <fcntl.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> + +#include "util/mesa-sha1.h" +#include "vk_util.h" + +static int +binding_compare(const void *av, const void *bv) +{ + const VkDescriptorSetLayoutBinding *a = + (const VkDescriptorSetLayoutBinding *) av; + const VkDescriptorSetLayoutBinding *b = + (const VkDescriptorSetLayoutBinding *) bv; + + return (a->binding < b->binding) ? -1 : (a->binding > b->binding) ? 1 : 0; +} + +static VkDescriptorSetLayoutBinding * +create_sorted_bindings(const VkDescriptorSetLayoutBinding *bindings, + unsigned count) +{ + VkDescriptorSetLayoutBinding *sorted_bindings = + malloc(count * sizeof(VkDescriptorSetLayoutBinding)); + if (!sorted_bindings) + return NULL; + + memcpy(sorted_bindings, bindings, + count * sizeof(VkDescriptorSetLayoutBinding)); + + qsort(sorted_bindings, count, sizeof(VkDescriptorSetLayoutBinding), + binding_compare); + + return sorted_bindings; +} + +VkResult +tu_CreateDescriptorSetLayout( + VkDevice _device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorSetLayout *pSetLayout) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_descriptor_set_layout *set_layout; + + assert(pCreateInfo->sType == + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + const VkDescriptorSetLayoutBindingFlagsCreateInfoEXT *variable_flags = + vk_find_struct_const( + pCreateInfo->pNext, + DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT); + + uint32_t max_binding = 0; + uint32_t immutable_sampler_count = 0; + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding); + if (pCreateInfo->pBindings[j].pImmutableSamplers) + immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount; + } + + uint32_t samplers_offset = + sizeof(struct tu_descriptor_set_layout) + + (max_binding + 1) * sizeof(set_layout->binding[0]); + size_t size = + samplers_offset + immutable_sampler_count * 4 * sizeof(uint32_t); + + set_layout = vk_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set_layout) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + set_layout->flags = pCreateInfo->flags; + + /* We just allocate all the samplers at the end of the struct */ + uint32_t *samplers = (uint32_t *) &set_layout->binding[max_binding + 1]; + (void) samplers; /* TODO: Use me */ + + VkDescriptorSetLayoutBinding *bindings = create_sorted_bindings( + pCreateInfo->pBindings, pCreateInfo->bindingCount); + if (!bindings) { + vk_free2(&device->alloc, pAllocator, set_layout); + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + set_layout->binding_count = max_binding + 1; + set_layout->shader_stages = 0; + set_layout->dynamic_shader_stages = 0; + set_layout->has_immutable_samplers = false; + set_layout->size = 0; + + memset(set_layout->binding, 0, + size - sizeof(struct tu_descriptor_set_layout)); + + uint32_t buffer_count = 0; + uint32_t dynamic_offset_count = 0; + + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + const VkDescriptorSetLayoutBinding *binding = bindings + j; + uint32_t b = binding->binding; + uint32_t alignment; + unsigned binding_buffer_count = 0; + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + assert(!(pCreateInfo->flags & + VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); + set_layout->binding[b].dynamic_offset_count = 1; + set_layout->dynamic_shader_stages |= binding->stageFlags; + set_layout->binding[b].size = 0; + binding_buffer_count = 1; + alignment = 1; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + set_layout->binding[b].size = 16; + binding_buffer_count = 1; + alignment = 16; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + /* main descriptor + fmask descriptor */ + set_layout->binding[b].size = 64; + binding_buffer_count = 1; + alignment = 32; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + /* main descriptor + fmask descriptor + sampler */ + set_layout->binding[b].size = 96; + binding_buffer_count = 1; + alignment = 32; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + set_layout->binding[b].size = 16; + alignment = 16; + break; + default: + unreachable("unknown descriptor type\n"); + break; + } + + set_layout->size = align(set_layout->size, alignment); + set_layout->binding[b].type = binding->descriptorType; + set_layout->binding[b].array_size = binding->descriptorCount; + set_layout->binding[b].offset = set_layout->size; + set_layout->binding[b].buffer_offset = buffer_count; + set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count; + + if (variable_flags && binding->binding < variable_flags->bindingCount && + (variable_flags->pBindingFlags[binding->binding] & + VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) { + assert(!binding->pImmutableSamplers); /* Terribly ill defined how + many samplers are valid */ + assert(binding->binding == max_binding); + + set_layout->has_variable_descriptors = true; + } + + if (binding->pImmutableSamplers) { + set_layout->binding[b].immutable_samplers_offset = samplers_offset; + set_layout->has_immutable_samplers = true; + } + + set_layout->size += + binding->descriptorCount * set_layout->binding[b].size; + buffer_count += binding->descriptorCount * binding_buffer_count; + dynamic_offset_count += binding->descriptorCount * + set_layout->binding[b].dynamic_offset_count; + set_layout->shader_stages |= binding->stageFlags; + } + + free(bindings); + + set_layout->buffer_count = buffer_count; + set_layout->dynamic_offset_count = dynamic_offset_count; + + *pSetLayout = tu_descriptor_set_layout_to_handle(set_layout); + + return VK_SUCCESS; +} + +void +tu_DestroyDescriptorSetLayout(VkDevice _device, + VkDescriptorSetLayout _set_layout, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_descriptor_set_layout, set_layout, _set_layout); + + if (!set_layout) + return; + + vk_free2(&device->alloc, pAllocator, set_layout); +} + +void +tu_GetDescriptorSetLayoutSupport( + VkDevice device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + VkDescriptorSetLayoutSupport *pSupport) +{ + VkDescriptorSetLayoutBinding *bindings = create_sorted_bindings( + pCreateInfo->pBindings, pCreateInfo->bindingCount); + if (!bindings) { + pSupport->supported = false; + return; + } + + const VkDescriptorSetLayoutBindingFlagsCreateInfoEXT *variable_flags = + vk_find_struct_const( + pCreateInfo->pNext, + DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT); + VkDescriptorSetVariableDescriptorCountLayoutSupportEXT *variable_count = + vk_find_struct( + (void *) pCreateInfo->pNext, + DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT_EXT); + if (variable_count) { + variable_count->maxVariableDescriptorCount = 0; + } + + bool supported = true; + uint64_t size = 0; + for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { + const VkDescriptorSetLayoutBinding *binding = bindings + i; + + uint64_t descriptor_size = 0; + uint64_t descriptor_alignment = 1; + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + descriptor_size = 16; + descriptor_alignment = 16; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + descriptor_size = 64; + descriptor_alignment = 32; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + descriptor_size = 96; + descriptor_alignment = 32; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + descriptor_size = 16; + descriptor_alignment = 16; + break; + default: + unreachable("unknown descriptor type\n"); + break; + } + + if (size && !align_u64(size, descriptor_alignment)) { + supported = false; + } + size = align_u64(size, descriptor_alignment); + + uint64_t max_count = UINT64_MAX; + if (descriptor_size) + max_count = (UINT64_MAX - size) / descriptor_size; + + if (max_count < binding->descriptorCount) { + supported = false; + } + if (variable_flags && binding->binding < variable_flags->bindingCount && + variable_count && + (variable_flags->pBindingFlags[binding->binding] & + VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) { + variable_count->maxVariableDescriptorCount = + MIN2(UINT32_MAX, max_count); + } + size += binding->descriptorCount * descriptor_size; + } + + free(bindings); + + pSupport->supported = supported; +} + +/* + * Pipeline layouts. These have nothing to do with the pipeline. They are + * just multiple descriptor set layouts pasted together. + */ + +VkResult +tu_CreatePipelineLayout(VkDevice _device, + const VkPipelineLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineLayout *pPipelineLayout) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_pipeline_layout *layout; + struct mesa_sha1 ctx; + + assert(pCreateInfo->sType == + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + + layout = vk_alloc2(&device->alloc, pAllocator, sizeof(*layout), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (layout == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + layout->num_sets = pCreateInfo->setLayoutCount; + + unsigned dynamic_offset_count = 0; + + _mesa_sha1_init(&ctx); + for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { + TU_FROM_HANDLE(tu_descriptor_set_layout, set_layout, + pCreateInfo->pSetLayouts[set]); + layout->set[set].layout = set_layout; + + layout->set[set].dynamic_offset_start = dynamic_offset_count; + for (uint32_t b = 0; b < set_layout->binding_count; b++) { + dynamic_offset_count += set_layout->binding[b].array_size * + set_layout->binding[b].dynamic_offset_count; + if (set_layout->binding[b].immutable_samplers_offset) + _mesa_sha1_update( + &ctx, + tu_immutable_samplers(set_layout, set_layout->binding + b), + set_layout->binding[b].array_size * 4 * sizeof(uint32_t)); + } + _mesa_sha1_update( + &ctx, set_layout->binding, + sizeof(set_layout->binding[0]) * set_layout->binding_count); + } + + layout->dynamic_offset_count = dynamic_offset_count; + layout->push_constant_size = 0; + + for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) { + const VkPushConstantRange *range = pCreateInfo->pPushConstantRanges + i; + layout->push_constant_size = + MAX2(layout->push_constant_size, range->offset + range->size); + } + + layout->push_constant_size = align(layout->push_constant_size, 16); + _mesa_sha1_update(&ctx, &layout->push_constant_size, + sizeof(layout->push_constant_size)); + _mesa_sha1_final(&ctx, layout->sha1); + *pPipelineLayout = tu_pipeline_layout_to_handle(layout); + + return VK_SUCCESS; +} + +void +tu_DestroyPipelineLayout(VkDevice _device, + VkPipelineLayout _pipelineLayout, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_pipeline_layout, pipeline_layout, _pipelineLayout); + + if (!pipeline_layout) + return; + vk_free2(&device->alloc, pAllocator, pipeline_layout); +} + +#define EMPTY 1 + +VkResult +tu_CreateDescriptorPool(VkDevice _device, + const VkDescriptorPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorPool *pDescriptorPool) +{ + TU_FROM_HANDLE(tu_device, device, _device); + tu_use_args(device); + tu_stub(); + return VK_SUCCESS; +} + +void +tu_DestroyDescriptorPool(VkDevice _device, + VkDescriptorPool _pool, + const VkAllocationCallbacks *pAllocator) +{ +} + +VkResult +tu_ResetDescriptorPool(VkDevice _device, + VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_descriptor_pool, pool, descriptorPool); + + tu_use_args(device, pool); + tu_stub(); + return VK_SUCCESS; +} + +VkResult +tu_AllocateDescriptorSets(VkDevice _device, + const VkDescriptorSetAllocateInfo *pAllocateInfo, + VkDescriptorSet *pDescriptorSets) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_descriptor_pool, pool, pAllocateInfo->descriptorPool); + + tu_use_args(device, pool); + tu_stub(); + return VK_SUCCESS; +} + +VkResult +tu_FreeDescriptorSets(VkDevice _device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet *pDescriptorSets) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_descriptor_pool, pool, descriptorPool); + + tu_use_args(device, pool); + tu_stub(); + return VK_SUCCESS; +} + +void +tu_update_descriptor_sets(struct tu_device *device, + struct tu_cmd_buffer *cmd_buffer, + VkDescriptorSet dstSetOverride, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet *pDescriptorCopies) +{ +} + +void +tu_UpdateDescriptorSets(VkDevice _device, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet *pDescriptorCopies) +{ + TU_FROM_HANDLE(tu_device, device, _device); + + tu_update_descriptor_sets(device, NULL, VK_NULL_HANDLE, + descriptorWriteCount, pDescriptorWrites, + descriptorCopyCount, pDescriptorCopies); +} + +VkResult +tu_CreateDescriptorUpdateTemplate( + VkDevice _device, + const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_descriptor_set_layout, set_layout, + pCreateInfo->descriptorSetLayout); + const uint32_t entry_count = pCreateInfo->descriptorUpdateEntryCount; + const size_t size = + sizeof(struct tu_descriptor_update_template) + + sizeof(struct tu_descriptor_update_template_entry) * entry_count; + struct tu_descriptor_update_template *templ; + + templ = vk_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!templ) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + *pDescriptorUpdateTemplate = + tu_descriptor_update_template_to_handle(templ); + + tu_use_args(set_layout); + tu_stub(); + return VK_SUCCESS; +} + +void +tu_DestroyDescriptorUpdateTemplate( + VkDevice _device, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_descriptor_update_template, templ, + descriptorUpdateTemplate); + + if (!templ) + return; + + vk_free2(&device->alloc, pAllocator, templ); +} + +void +tu_update_descriptor_set_with_template( + struct tu_device *device, + struct tu_cmd_buffer *cmd_buffer, + struct tu_descriptor_set *set, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + const void *pData) +{ + TU_FROM_HANDLE(tu_descriptor_update_template, templ, + descriptorUpdateTemplate); + tu_use_args(templ); +} + +void +tu_UpdateDescriptorSetWithTemplate( + VkDevice _device, + VkDescriptorSet descriptorSet, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + const void *pData) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_descriptor_set, set, descriptorSet); + + tu_update_descriptor_set_with_template(device, NULL, set, + descriptorUpdateTemplate, pData); +} + +VkResult +tu_CreateSamplerYcbcrConversion( + VkDevice device, + const VkSamplerYcbcrConversionCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSamplerYcbcrConversion *pYcbcrConversion) +{ + *pYcbcrConversion = VK_NULL_HANDLE; + return VK_SUCCESS; +} + +void +tu_DestroySamplerYcbcrConversion(VkDevice device, + VkSamplerYcbcrConversion ycbcrConversion, + const VkAllocationCallbacks *pAllocator) +{ + /* Do nothing. */ +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_descriptor_set.h b/lib/mesa/src/freedreno/vulkan/tu_descriptor_set.h new file mode 100644 index 000000000..5692e11b1 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_descriptor_set.h @@ -0,0 +1,102 @@ +/* + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef TU_DESCRIPTOR_SET_H +#define TU_DESCRIPTOR_SET_H + +#include <vulkan/vulkan.h> + +#define MAX_SETS 32 + +struct tu_descriptor_set_binding_layout +{ + VkDescriptorType type; + + /* Number of array elements in this binding */ + uint32_t array_size; + + uint32_t offset; + uint32_t buffer_offset; + uint16_t dynamic_offset_offset; + + uint16_t dynamic_offset_count; + /* redundant with the type, each for a single array element */ + uint32_t size; + + /* Offset in the tu_descriptor_set_layout of the immutable samplers, or 0 + * if there are no immutable samplers. */ + uint32_t immutable_samplers_offset; +}; + +struct tu_descriptor_set_layout +{ + /* The create flags for this descriptor set layout */ + VkDescriptorSetLayoutCreateFlags flags; + + /* Number of bindings in this descriptor set */ + uint32_t binding_count; + + /* Total size of the descriptor set with room for all array entries */ + uint32_t size; + + /* Shader stages affected by this descriptor set */ + uint16_t shader_stages; + uint16_t dynamic_shader_stages; + + /* Number of buffers in this descriptor set */ + uint32_t buffer_count; + + /* Number of dynamic offsets used by this descriptor set */ + uint16_t dynamic_offset_count; + + bool has_immutable_samplers; + bool has_variable_descriptors; + + /* Bindings in this descriptor set */ + struct tu_descriptor_set_binding_layout binding[0]; +}; + +struct tu_pipeline_layout +{ + struct + { + struct tu_descriptor_set_layout *layout; + uint32_t size; + uint32_t dynamic_offset_start; + } set[MAX_SETS]; + + uint32_t num_sets; + uint32_t push_constant_size; + uint32_t dynamic_offset_count; + + unsigned char sha1[20]; +}; + +static inline const uint32_t * +tu_immutable_samplers(const struct tu_descriptor_set_layout *set, + const struct tu_descriptor_set_binding_layout *binding) +{ + return (const uint32_t *) ((const char *) set + + binding->immutable_samplers_offset); +} +#endif /* TU_DESCRIPTOR_SET_H */ diff --git a/lib/mesa/src/freedreno/vulkan/tu_device.c b/lib/mesa/src/freedreno/vulkan/tu_device.c new file mode 100644 index 000000000..901f02486 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_device.c @@ -0,0 +1,2071 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include <fcntl.h> +#include <libsync.h> +#include <stdbool.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/sysinfo.h> +#include <unistd.h> +#include <xf86drm.h> + +#include "compiler/glsl_types.h" +#include "util/debug.h" +#include "util/disk_cache.h" +#include "vk_format.h" +#include "vk_util.h" + +#include "drm-uapi/msm_drm.h" + +static int +tu_device_get_cache_uuid(uint16_t family, void *uuid) +{ + uint32_t mesa_timestamp; + uint16_t f = family; + memset(uuid, 0, VK_UUID_SIZE); + if (!disk_cache_get_function_timestamp(tu_device_get_cache_uuid, + &mesa_timestamp)) + return -1; + + memcpy(uuid, &mesa_timestamp, 4); + memcpy((char *) uuid + 4, &f, 2); + snprintf((char *) uuid + 6, VK_UUID_SIZE - 10, "tu"); + return 0; +} + +static void +tu_get_driver_uuid(void *uuid) +{ + memset(uuid, 0, VK_UUID_SIZE); + snprintf(uuid, VK_UUID_SIZE, "freedreno"); +} + +static void +tu_get_device_uuid(void *uuid) +{ + memset(uuid, 0, VK_UUID_SIZE); +} + +static VkResult +tu_bo_init(struct tu_device *dev, + struct tu_bo *bo, + uint32_t gem_handle, + uint64_t size) +{ + uint64_t iova = tu_gem_info_iova(dev, gem_handle); + if (!iova) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + *bo = (struct tu_bo) { + .gem_handle = gem_handle, + .size = size, + .iova = iova, + }; + + return VK_SUCCESS; +} + +VkResult +tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size) +{ + /* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c + * always sets `flags = MSM_BO_WC`, and we copy that behavior here. + */ + uint32_t gem_handle = tu_gem_new(dev, size, MSM_BO_WC); + if (!gem_handle) + return vk_error(dev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + VkResult result = tu_bo_init(dev, bo, gem_handle, size); + if (result != VK_SUCCESS) { + tu_gem_close(dev, gem_handle); + return vk_error(dev->instance, result); + } + + return VK_SUCCESS; +} + +VkResult +tu_bo_init_dmabuf(struct tu_device *dev, + struct tu_bo *bo, + uint64_t size, + int fd) +{ + uint32_t gem_handle = tu_gem_import_dmabuf(dev, fd, size); + if (!gem_handle) + return vk_error(dev->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); + + VkResult result = tu_bo_init(dev, bo, gem_handle, size); + if (result != VK_SUCCESS) { + tu_gem_close(dev, gem_handle); + return vk_error(dev->instance, result); + } + + return VK_SUCCESS; +} + +int +tu_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo) +{ + return tu_gem_export_dmabuf(dev, bo->gem_handle); +} + +VkResult +tu_bo_map(struct tu_device *dev, struct tu_bo *bo) +{ + if (bo->map) + return VK_SUCCESS; + + uint64_t offset = tu_gem_info_offset(dev, bo->gem_handle); + if (!offset) + return vk_error(dev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + /* TODO: Should we use the wrapper os_mmap() like Freedreno does? */ + void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, + dev->physical_device->local_fd, offset); + if (map == MAP_FAILED) + return vk_error(dev->instance, VK_ERROR_MEMORY_MAP_FAILED); + + bo->map = map; + return VK_SUCCESS; +} + +void +tu_bo_finish(struct tu_device *dev, struct tu_bo *bo) +{ + assert(bo->gem_handle); + + if (bo->map) + munmap(bo->map, bo->size); + + tu_gem_close(dev, bo->gem_handle); +} + +static VkResult +tu_physical_device_init(struct tu_physical_device *device, + struct tu_instance *instance, + drmDevicePtr drm_device) +{ + const char *path = drm_device->nodes[DRM_NODE_RENDER]; + VkResult result = VK_SUCCESS; + drmVersionPtr version; + int fd; + int master_fd = -1; + + fd = open(path, O_RDWR | O_CLOEXEC); + if (fd < 0) { + return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, + "failed to open device %s", path); + } + + /* Version 1.3 added MSM_INFO_IOVA. */ + const int min_version_major = 1; + const int min_version_minor = 3; + + version = drmGetVersion(fd); + if (!version) { + close(fd); + return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, + "failed to query kernel driver version for device %s", + path); + } + + if (strcmp(version->name, "msm")) { + drmFreeVersion(version); + close(fd); + return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, + "device %s does not use the msm kernel driver", path); + } + + if (version->version_major != min_version_major || + version->version_minor < min_version_minor) { + result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, + "kernel driver for device %s has version %d.%d, " + "but Vulkan requires version >= %d.%d", + path, version->version_major, version->version_minor, + min_version_major, min_version_minor); + drmFreeVersion(version); + close(fd); + return result; + } + + drmFreeVersion(version); + + if (instance->debug_flags & TU_DEBUG_STARTUP) + tu_logi("Found compatible device '%s'.", path); + + device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + device->instance = instance; + assert(strlen(path) < ARRAY_SIZE(device->path)); + strncpy(device->path, path, ARRAY_SIZE(device->path)); + + if (instance->enabled_extensions.KHR_display) { + master_fd = + open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC); + if (master_fd >= 0) { + /* TODO: free master_fd is accel is not working? */ + } + } + + device->master_fd = master_fd; + device->local_fd = fd; + + if (tu_drm_get_gpu_id(device, &device->gpu_id)) { + if (instance->debug_flags & TU_DEBUG_STARTUP) + tu_logi("Could not query the GPU ID"); + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, + "could not get GPU ID"); + goto fail; + } + + if (tu_drm_get_gmem_size(device, &device->gmem_size)) { + if (instance->debug_flags & TU_DEBUG_STARTUP) + tu_logi("Could not query the GMEM size"); + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, + "could not get GMEM size"); + goto fail; + } + + memset(device->name, 0, sizeof(device->name)); + sprintf(device->name, "FD%d", device->gpu_id); + + switch (device->gpu_id) { + case 630: + device->tile_align_w = 32; + device->tile_align_h = 32; + break; + default: + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, + "device %s is unsupported", device->name); + goto fail; + } + if (tu_device_get_cache_uuid(device->gpu_id, device->cache_uuid)) { + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, + "cannot generate UUID"); + goto fail; + } + + /* The gpu id is already embedded in the uuid so we just pass "tu" + * when creating the cache. + */ + char buf[VK_UUID_SIZE * 2 + 1]; + disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2); + device->disk_cache = disk_cache_create(device->name, buf, 0); + + fprintf(stderr, "WARNING: tu is not a conformant vulkan implementation, " + "testing use only.\n"); + + tu_get_driver_uuid(&device->device_uuid); + tu_get_device_uuid(&device->device_uuid); + + tu_fill_device_extension_table(device, &device->supported_extensions); + + if (result != VK_SUCCESS) { + vk_error(instance, result); + goto fail; + } + + result = tu_wsi_init(device); + if (result != VK_SUCCESS) { + vk_error(instance, result); + goto fail; + } + + return VK_SUCCESS; + +fail: + close(fd); + if (master_fd != -1) + close(master_fd); + return result; +} + +static void +tu_physical_device_finish(struct tu_physical_device *device) +{ + tu_wsi_finish(device); + + disk_cache_destroy(device->disk_cache); + close(device->local_fd); + if (device->master_fd != -1) + close(device->master_fd); +} + +static void * +default_alloc_func(void *pUserData, + size_t size, + size_t align, + VkSystemAllocationScope allocationScope) +{ + return malloc(size); +} + +static void * +default_realloc_func(void *pUserData, + void *pOriginal, + size_t size, + size_t align, + VkSystemAllocationScope allocationScope) +{ + return realloc(pOriginal, size); +} + +static void +default_free_func(void *pUserData, void *pMemory) +{ + free(pMemory); +} + +static const VkAllocationCallbacks default_alloc = { + .pUserData = NULL, + .pfnAllocation = default_alloc_func, + .pfnReallocation = default_realloc_func, + .pfnFree = default_free_func, +}; + +static const struct debug_control tu_debug_options[] = { + { "startup", TU_DEBUG_STARTUP }, + { "nir", TU_DEBUG_NIR }, + { "ir3", TU_DEBUG_IR3 }, + { NULL, 0 } +}; + +const char * +tu_get_debug_option_name(int id) +{ + assert(id < ARRAY_SIZE(tu_debug_options) - 1); + return tu_debug_options[id].string; +} + +static int +tu_get_instance_extension_index(const char *name) +{ + for (unsigned i = 0; i < TU_INSTANCE_EXTENSION_COUNT; ++i) { + if (strcmp(name, tu_instance_extensions[i].extensionName) == 0) + return i; + } + return -1; +} + +VkResult +tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkInstance *pInstance) +{ + struct tu_instance *instance; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + + uint32_t client_version; + if (pCreateInfo->pApplicationInfo && + pCreateInfo->pApplicationInfo->apiVersion != 0) { + client_version = pCreateInfo->pApplicationInfo->apiVersion; + } else { + tu_EnumerateInstanceVersion(&client_version); + } + + instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!instance) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + + if (pAllocator) + instance->alloc = *pAllocator; + else + instance->alloc = default_alloc; + + instance->api_version = client_version; + instance->physical_device_count = -1; + + instance->debug_flags = + parse_debug_string(getenv("TU_DEBUG"), tu_debug_options); + + if (instance->debug_flags & TU_DEBUG_STARTUP) + tu_logi("Created an instance"); + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i]; + int index = tu_get_instance_extension_index(ext_name); + + if (index < 0 || !tu_supported_instance_extensions.extensions[index]) { + vk_free2(&default_alloc, pAllocator, instance); + return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT); + } + + instance->enabled_extensions.extensions[index] = true; + } + + result = vk_debug_report_instance_init(&instance->debug_report_callbacks); + if (result != VK_SUCCESS) { + vk_free2(&default_alloc, pAllocator, instance); + return vk_error(instance, result); + } + + glsl_type_singleton_init_or_ref(); + + VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); + + *pInstance = tu_instance_to_handle(instance); + + return VK_SUCCESS; +} + +void +tu_DestroyInstance(VkInstance _instance, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_instance, instance, _instance); + + if (!instance) + return; + + for (int i = 0; i < instance->physical_device_count; ++i) { + tu_physical_device_finish(instance->physical_devices + i); + } + + VG(VALGRIND_DESTROY_MEMPOOL(instance)); + + glsl_type_singleton_decref(); + + vk_debug_report_instance_destroy(&instance->debug_report_callbacks); + + vk_free(&instance->alloc, instance); +} + +static VkResult +tu_enumerate_devices(struct tu_instance *instance) +{ + /* TODO: Check for more devices ? */ + drmDevicePtr devices[8]; + VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER; + int max_devices; + + instance->physical_device_count = 0; + + max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices)); + + if (instance->debug_flags & TU_DEBUG_STARTUP) + tu_logi("Found %d drm nodes", max_devices); + + if (max_devices < 1) + return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER); + + for (unsigned i = 0; i < (unsigned) max_devices; i++) { + if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER && + devices[i]->bustype == DRM_BUS_PLATFORM) { + + result = tu_physical_device_init( + instance->physical_devices + instance->physical_device_count, + instance, devices[i]); + if (result == VK_SUCCESS) + ++instance->physical_device_count; + else if (result != VK_ERROR_INCOMPATIBLE_DRIVER) + break; + } + } + drmFreeDevices(devices, max_devices); + + return result; +} + +VkResult +tu_EnumeratePhysicalDevices(VkInstance _instance, + uint32_t *pPhysicalDeviceCount, + VkPhysicalDevice *pPhysicalDevices) +{ + TU_FROM_HANDLE(tu_instance, instance, _instance); + VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount); + + VkResult result; + + if (instance->physical_device_count < 0) { + result = tu_enumerate_devices(instance); + if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER) + return result; + } + + for (uint32_t i = 0; i < instance->physical_device_count; ++i) { + vk_outarray_append(&out, p) + { + *p = tu_physical_device_to_handle(instance->physical_devices + i); + } + } + + return vk_outarray_status(&out); +} + +VkResult +tu_EnumeratePhysicalDeviceGroups( + VkInstance _instance, + uint32_t *pPhysicalDeviceGroupCount, + VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties) +{ + TU_FROM_HANDLE(tu_instance, instance, _instance); + VK_OUTARRAY_MAKE(out, pPhysicalDeviceGroupProperties, + pPhysicalDeviceGroupCount); + VkResult result; + + if (instance->physical_device_count < 0) { + result = tu_enumerate_devices(instance); + if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER) + return result; + } + + for (uint32_t i = 0; i < instance->physical_device_count; ++i) { + vk_outarray_append(&out, p) + { + p->physicalDeviceCount = 1; + p->physicalDevices[0] = + tu_physical_device_to_handle(instance->physical_devices + i); + p->subsetAllocation = false; + } + } + + return vk_outarray_status(&out); +} + +void +tu_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures *pFeatures) +{ + memset(pFeatures, 0, sizeof(*pFeatures)); + + *pFeatures = (VkPhysicalDeviceFeatures) { + .robustBufferAccess = false, + .fullDrawIndexUint32 = false, + .imageCubeArray = false, + .independentBlend = false, + .geometryShader = false, + .tessellationShader = false, + .sampleRateShading = false, + .dualSrcBlend = false, + .logicOp = false, + .multiDrawIndirect = false, + .drawIndirectFirstInstance = false, + .depthClamp = false, + .depthBiasClamp = false, + .fillModeNonSolid = false, + .depthBounds = false, + .wideLines = false, + .largePoints = false, + .alphaToOne = false, + .multiViewport = false, + .samplerAnisotropy = false, + .textureCompressionETC2 = false, + .textureCompressionASTC_LDR = false, + .textureCompressionBC = false, + .occlusionQueryPrecise = false, + .pipelineStatisticsQuery = false, + .vertexPipelineStoresAndAtomics = false, + .fragmentStoresAndAtomics = false, + .shaderTessellationAndGeometryPointSize = false, + .shaderImageGatherExtended = false, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = false, + .shaderUniformBufferArrayDynamicIndexing = false, + .shaderSampledImageArrayDynamicIndexing = false, + .shaderStorageBufferArrayDynamicIndexing = false, + .shaderStorageImageArrayDynamicIndexing = false, + .shaderStorageImageReadWithoutFormat = false, + .shaderStorageImageWriteWithoutFormat = false, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderInt16 = false, + .sparseBinding = false, + .variableMultisampleRate = false, + .inheritedQueries = false, + }; +} + +void +tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2 *pFeatures) +{ + vk_foreach_struct(ext, pFeatures->pNext) + { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: { + VkPhysicalDeviceVariablePointersFeatures *features = (void *) ext; + features->variablePointersStorageBuffer = false; + features->variablePointers = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: { + VkPhysicalDeviceMultiviewFeatures *features = + (VkPhysicalDeviceMultiviewFeatures *) ext; + features->multiview = false; + features->multiviewGeometryShader = false; + features->multiviewTessellationShader = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: { + VkPhysicalDeviceShaderDrawParametersFeatures *features = + (VkPhysicalDeviceShaderDrawParametersFeatures *) ext; + features->shaderDrawParameters = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: { + VkPhysicalDeviceProtectedMemoryFeatures *features = + (VkPhysicalDeviceProtectedMemoryFeatures *) ext; + features->protectedMemory = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { + VkPhysicalDevice16BitStorageFeatures *features = + (VkPhysicalDevice16BitStorageFeatures *) ext; + features->storageBuffer16BitAccess = false; + features->uniformAndStorageBuffer16BitAccess = false; + features->storagePushConstant16 = false; + features->storageInputOutput16 = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { + VkPhysicalDeviceSamplerYcbcrConversionFeatures *features = + (VkPhysicalDeviceSamplerYcbcrConversionFeatures *) ext; + features->samplerYcbcrConversion = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: { + VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features = + (VkPhysicalDeviceDescriptorIndexingFeaturesEXT *) ext; + features->shaderInputAttachmentArrayDynamicIndexing = false; + features->shaderUniformTexelBufferArrayDynamicIndexing = false; + features->shaderStorageTexelBufferArrayDynamicIndexing = false; + features->shaderUniformBufferArrayNonUniformIndexing = false; + features->shaderSampledImageArrayNonUniformIndexing = false; + features->shaderStorageBufferArrayNonUniformIndexing = false; + features->shaderStorageImageArrayNonUniformIndexing = false; + features->shaderInputAttachmentArrayNonUniformIndexing = false; + features->shaderUniformTexelBufferArrayNonUniformIndexing = false; + features->shaderStorageTexelBufferArrayNonUniformIndexing = false; + features->descriptorBindingUniformBufferUpdateAfterBind = false; + features->descriptorBindingSampledImageUpdateAfterBind = false; + features->descriptorBindingStorageImageUpdateAfterBind = false; + features->descriptorBindingStorageBufferUpdateAfterBind = false; + features->descriptorBindingUniformTexelBufferUpdateAfterBind = false; + features->descriptorBindingStorageTexelBufferUpdateAfterBind = false; + features->descriptorBindingUpdateUnusedWhilePending = false; + features->descriptorBindingPartiallyBound = false; + features->descriptorBindingVariableDescriptorCount = false; + features->runtimeDescriptorArray = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: { + VkPhysicalDeviceConditionalRenderingFeaturesEXT *features = + (VkPhysicalDeviceConditionalRenderingFeaturesEXT *) ext; + features->conditionalRendering = false; + features->inheritedConditionalRendering = false; + break; + } + default: + break; + } + } + return tu_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features); +} + +void +tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties *pProperties) +{ + TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice); + VkSampleCountFlags sample_counts = 0xf; + + /* make sure that the entire descriptor set is addressable with a signed + * 32-bit int. So the sum of all limits scaled by descriptor size has to + * be at most 2 GiB. the combined image & samples object count as one of + * both. This limit is for the pipeline layout, not for the set layout, but + * there is no set limit, so we just set a pipeline limit. I don't think + * any app is going to hit this soon. */ + size_t max_descriptor_set_size = + ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) / + (32 /* uniform buffer, 32 due to potential space wasted on alignment */ + + 32 /* storage buffer, 32 due to potential space wasted on alignment */ + + 32 /* sampler, largest when combined with image */ + + 64 /* sampled image */ + 64 /* storage image */); + + VkPhysicalDeviceLimits limits = { + .maxImageDimension1D = (1 << 14), + .maxImageDimension2D = (1 << 14), + .maxImageDimension3D = (1 << 11), + .maxImageDimensionCube = (1 << 14), + .maxImageArrayLayers = (1 << 11), + .maxTexelBufferElements = 128 * 1024 * 1024, + .maxUniformBufferRange = UINT32_MAX, + .maxStorageBufferRange = UINT32_MAX, + .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, + .maxMemoryAllocationCount = UINT32_MAX, + .maxSamplerAllocationCount = 64 * 1024, + .bufferImageGranularity = 64, /* A cache line */ + .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */ + .maxBoundDescriptorSets = MAX_SETS, + .maxPerStageDescriptorSamplers = max_descriptor_set_size, + .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size, + .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size, + .maxPerStageDescriptorSampledImages = max_descriptor_set_size, + .maxPerStageDescriptorStorageImages = max_descriptor_set_size, + .maxPerStageDescriptorInputAttachments = max_descriptor_set_size, + .maxPerStageResources = max_descriptor_set_size, + .maxDescriptorSetSamplers = max_descriptor_set_size, + .maxDescriptorSetUniformBuffers = max_descriptor_set_size, + .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS, + .maxDescriptorSetStorageBuffers = max_descriptor_set_size, + .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS, + .maxDescriptorSetSampledImages = max_descriptor_set_size, + .maxDescriptorSetStorageImages = max_descriptor_set_size, + .maxDescriptorSetInputAttachments = max_descriptor_set_size, + .maxVertexInputAttributes = 32, + .maxVertexInputBindings = 32, + .maxVertexInputAttributeOffset = 2047, + .maxVertexInputBindingStride = 2048, + .maxVertexOutputComponents = 128, + .maxTessellationGenerationLevel = 64, + .maxTessellationPatchSize = 32, + .maxTessellationControlPerVertexInputComponents = 128, + .maxTessellationControlPerVertexOutputComponents = 128, + .maxTessellationControlPerPatchOutputComponents = 120, + .maxTessellationControlTotalOutputComponents = 4096, + .maxTessellationEvaluationInputComponents = 128, + .maxTessellationEvaluationOutputComponents = 128, + .maxGeometryShaderInvocations = 127, + .maxGeometryInputComponents = 64, + .maxGeometryOutputComponents = 128, + .maxGeometryOutputVertices = 256, + .maxGeometryTotalOutputComponents = 1024, + .maxFragmentInputComponents = 128, + .maxFragmentOutputAttachments = 8, + .maxFragmentDualSrcAttachments = 1, + .maxFragmentCombinedOutputResources = 8, + .maxComputeSharedMemorySize = 32768, + .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, + .maxComputeWorkGroupInvocations = 2048, + .maxComputeWorkGroupSize = { 2048, 2048, 2048 }, + .subPixelPrecisionBits = 4 /* FIXME */, + .subTexelPrecisionBits = 4 /* FIXME */, + .mipmapPrecisionBits = 4 /* FIXME */, + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectCount = UINT32_MAX, + .maxSamplerLodBias = 16, + .maxSamplerAnisotropy = 16, + .maxViewports = MAX_VIEWPORTS, + .maxViewportDimensions = { (1 << 14), (1 << 14) }, + .viewportBoundsRange = { INT16_MIN, INT16_MAX }, + .viewportSubPixelBits = 8, + .minMemoryMapAlignment = 4096, /* A page */ + .minTexelBufferOffsetAlignment = 1, + .minUniformBufferOffsetAlignment = 4, + .minStorageBufferOffsetAlignment = 4, + .minTexelOffset = -32, + .maxTexelOffset = 31, + .minTexelGatherOffset = -32, + .maxTexelGatherOffset = 31, + .minInterpolationOffset = -2, + .maxInterpolationOffset = 2, + .subPixelInterpolationOffsetBits = 8, + .maxFramebufferWidth = (1 << 14), + .maxFramebufferHeight = (1 << 14), + .maxFramebufferLayers = (1 << 10), + .framebufferColorSampleCounts = sample_counts, + .framebufferDepthSampleCounts = sample_counts, + .framebufferStencilSampleCounts = sample_counts, + .framebufferNoAttachmentsSampleCounts = sample_counts, + .maxColorAttachments = MAX_RTS, + .sampledImageColorSampleCounts = sample_counts, + .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .sampledImageDepthSampleCounts = sample_counts, + .sampledImageStencilSampleCounts = sample_counts, + .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .maxSampleMaskWords = 1, + .timestampComputeAndGraphics = true, + .timestampPeriod = 1, + .maxClipDistances = 8, + .maxCullDistances = 8, + .maxCombinedClipAndCullDistances = 8, + .discreteQueuePriorities = 1, + .pointSizeRange = { 0.125, 255.875 }, + .lineWidthRange = { 0.0, 7.9921875 }, + .pointSizeGranularity = (1.0 / 8.0), + .lineWidthGranularity = (1.0 / 128.0), + .strictLines = false, /* FINISHME */ + .standardSampleLocations = true, + .optimalBufferCopyOffsetAlignment = 128, + .optimalBufferCopyRowPitchAlignment = 128, + .nonCoherentAtomSize = 64, + }; + + *pProperties = (VkPhysicalDeviceProperties) { + .apiVersion = tu_physical_device_api_version(pdevice), + .driverVersion = vk_get_driver_version(), + .vendorID = 0, /* TODO */ + .deviceID = 0, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + .limits = limits, + .sparseProperties = { 0 }, + }; + + strcpy(pProperties->deviceName, pdevice->name); + memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE); +} + +void +tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties2 *pProperties) +{ + TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice); + tu_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); + + vk_foreach_struct(ext, pProperties->pNext) + { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { + VkPhysicalDevicePushDescriptorPropertiesKHR *properties = + (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext; + properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { + VkPhysicalDeviceIDProperties *properties = + (VkPhysicalDeviceIDProperties *) ext; + memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); + memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); + properties->deviceLUIDValid = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: { + VkPhysicalDeviceMultiviewProperties *properties = + (VkPhysicalDeviceMultiviewProperties *) ext; + properties->maxMultiviewViewCount = MAX_VIEWS; + properties->maxMultiviewInstanceIndex = INT_MAX; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { + VkPhysicalDevicePointClippingProperties *properties = + (VkPhysicalDevicePointClippingProperties *) ext; + properties->pointClippingBehavior = + VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: { + VkPhysicalDeviceMaintenance3Properties *properties = + (VkPhysicalDeviceMaintenance3Properties *) ext; + /* Make sure everything is addressable by a signed 32-bit int, and + * our largest descriptors are 96 bytes. */ + properties->maxPerSetDescriptors = (1ull << 31) / 96; + /* Our buffer size fields allow only this much */ + properties->maxMemoryAllocationSize = 0xFFFFFFFFull; + break; + } + default: + break; + } + } +} + +static const VkQueueFamilyProperties tu_queue_family_properties = { + .queueFlags = + VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, + .queueCount = 1, + .timestampValidBits = 64, + .minImageTransferGranularity = { 1, 1, 1 }, +}; + +void +tu_GetPhysicalDeviceQueueFamilyProperties( + VkPhysicalDevice physicalDevice, + uint32_t *pQueueFamilyPropertyCount, + VkQueueFamilyProperties *pQueueFamilyProperties) +{ + VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount); + + vk_outarray_append(&out, p) { *p = tu_queue_family_properties; } +} + +void +tu_GetPhysicalDeviceQueueFamilyProperties2( + VkPhysicalDevice physicalDevice, + uint32_t *pQueueFamilyPropertyCount, + VkQueueFamilyProperties2 *pQueueFamilyProperties) +{ + VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount); + + vk_outarray_append(&out, p) + { + p->queueFamilyProperties = tu_queue_family_properties; + } +} + +static uint64_t +tu_get_system_heap_size() +{ + struct sysinfo info; + sysinfo(&info); + + uint64_t total_ram = (uint64_t) info.totalram * (uint64_t) info.mem_unit; + + /* We don't want to burn too much ram with the GPU. If the user has 4GiB + * or less, we use at most half. If they have more than 4GiB, we use 3/4. + */ + uint64_t available_ram; + if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull) + available_ram = total_ram / 2; + else + available_ram = total_ram * 3 / 4; + + return available_ram; +} + +void +tu_GetPhysicalDeviceMemoryProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties *pMemoryProperties) +{ + pMemoryProperties->memoryHeapCount = 1; + pMemoryProperties->memoryHeaps[0].size = tu_get_system_heap_size(); + pMemoryProperties->memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; + + pMemoryProperties->memoryTypeCount = 1; + pMemoryProperties->memoryTypes[0].propertyFlags = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + pMemoryProperties->memoryTypes[0].heapIndex = 0; +} + +void +tu_GetPhysicalDeviceMemoryProperties2( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) +{ + return tu_GetPhysicalDeviceMemoryProperties( + physicalDevice, &pMemoryProperties->memoryProperties); +} + +static VkResult +tu_queue_init(struct tu_device *device, + struct tu_queue *queue, + uint32_t queue_family_index, + int idx, + VkDeviceQueueCreateFlags flags) +{ + queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + queue->device = device; + queue->queue_family_index = queue_family_index; + queue->queue_idx = idx; + queue->flags = flags; + + int ret = tu_drm_submitqueue_new(device, 0, &queue->msm_queue_id); + if (ret) + return VK_ERROR_INITIALIZATION_FAILED; + + tu_fence_init(&queue->submit_fence, false); + + return VK_SUCCESS; +} + +static void +tu_queue_finish(struct tu_queue *queue) +{ + tu_fence_finish(&queue->submit_fence); + tu_drm_submitqueue_close(queue->device, queue->msm_queue_id); +} + +static int +tu_get_device_extension_index(const char *name) +{ + for (unsigned i = 0; i < TU_DEVICE_EXTENSION_COUNT; ++i) { + if (strcmp(name, tu_device_extensions[i].extensionName) == 0) + return i; + } + return -1; +} + +VkResult +tu_CreateDevice(VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDevice *pDevice) +{ + TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice); + VkResult result; + struct tu_device *device; + + /* Check enabled features */ + if (pCreateInfo->pEnabledFeatures) { + VkPhysicalDeviceFeatures supported_features; + tu_GetPhysicalDeviceFeatures(physicalDevice, &supported_features); + VkBool32 *supported_feature = (VkBool32 *) &supported_features; + VkBool32 *enabled_feature = (VkBool32 *) pCreateInfo->pEnabledFeatures; + unsigned num_features = + sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); + for (uint32_t i = 0; i < num_features; i++) { + if (enabled_feature[i] && !supported_feature[i]) + return vk_error(physical_device->instance, + VK_ERROR_FEATURE_NOT_PRESENT); + } + } + + device = vk_zalloc2(&physical_device->instance->alloc, pAllocator, + sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!device) + return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + device->instance = physical_device->instance; + device->physical_device = physical_device; + + if (pAllocator) + device->alloc = *pAllocator; + else + device->alloc = physical_device->instance->alloc; + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i]; + int index = tu_get_device_extension_index(ext_name); + if (index < 0 || + !physical_device->supported_extensions.extensions[index]) { + vk_free(&device->alloc, device); + return vk_error(physical_device->instance, + VK_ERROR_EXTENSION_NOT_PRESENT); + } + + device->enabled_extensions.extensions[index] = true; + } + + for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { + const VkDeviceQueueCreateInfo *queue_create = + &pCreateInfo->pQueueCreateInfos[i]; + uint32_t qfi = queue_create->queueFamilyIndex; + device->queues[qfi] = vk_alloc( + &device->alloc, queue_create->queueCount * sizeof(struct tu_queue), + 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!device->queues[qfi]) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + memset(device->queues[qfi], 0, + queue_create->queueCount * sizeof(struct tu_queue)); + + device->queue_count[qfi] = queue_create->queueCount; + + for (unsigned q = 0; q < queue_create->queueCount; q++) { + result = tu_queue_init(device, &device->queues[qfi][q], qfi, q, + queue_create->flags); + if (result != VK_SUCCESS) + goto fail; + } + } + + device->compiler = ir3_compiler_create(NULL, physical_device->gpu_id); + if (!device->compiler) + goto fail; + + VkPipelineCacheCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; + ci.pNext = NULL; + ci.flags = 0; + ci.pInitialData = NULL; + ci.initialDataSize = 0; + VkPipelineCache pc; + result = + tu_CreatePipelineCache(tu_device_to_handle(device), &ci, NULL, &pc); + if (result != VK_SUCCESS) + goto fail; + + device->mem_cache = tu_pipeline_cache_from_handle(pc); + + *pDevice = tu_device_to_handle(device); + return VK_SUCCESS; + +fail: + for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) { + for (unsigned q = 0; q < device->queue_count[i]; q++) + tu_queue_finish(&device->queues[i][q]); + if (device->queue_count[i]) + vk_free(&device->alloc, device->queues[i]); + } + + if (device->compiler) + ralloc_free(device->compiler); + + vk_free(&device->alloc, device); + return result; +} + +void +tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + + if (!device) + return; + + for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) { + for (unsigned q = 0; q < device->queue_count[i]; q++) + tu_queue_finish(&device->queues[i][q]); + if (device->queue_count[i]) + vk_free(&device->alloc, device->queues[i]); + } + + /* the compiler does not use pAllocator */ + ralloc_free(device->compiler); + + VkPipelineCache pc = tu_pipeline_cache_to_handle(device->mem_cache); + tu_DestroyPipelineCache(tu_device_to_handle(device), pc, NULL); + + vk_free(&device->alloc, device); +} + +VkResult +tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, + VkLayerProperties *pProperties) +{ + *pPropertyCount = 0; + return VK_SUCCESS; +} + +VkResult +tu_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, + uint32_t *pPropertyCount, + VkLayerProperties *pProperties) +{ + *pPropertyCount = 0; + return VK_SUCCESS; +} + +void +tu_GetDeviceQueue2(VkDevice _device, + const VkDeviceQueueInfo2 *pQueueInfo, + VkQueue *pQueue) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_queue *queue; + + queue = + &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex]; + if (pQueueInfo->flags != queue->flags) { + /* From the Vulkan 1.1.70 spec: + * + * "The queue returned by vkGetDeviceQueue2 must have the same + * flags value from this structure as that used at device + * creation time in a VkDeviceQueueCreateInfo instance. If no + * matching flags were specified at device creation time then + * pQueue will return VK_NULL_HANDLE." + */ + *pQueue = VK_NULL_HANDLE; + return; + } + + *pQueue = tu_queue_to_handle(queue); +} + +void +tu_GetDeviceQueue(VkDevice _device, + uint32_t queueFamilyIndex, + uint32_t queueIndex, + VkQueue *pQueue) +{ + const VkDeviceQueueInfo2 info = + (VkDeviceQueueInfo2) { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2, + .queueFamilyIndex = queueFamilyIndex, + .queueIndex = queueIndex }; + + tu_GetDeviceQueue2(_device, &info, pQueue); +} + +VkResult +tu_QueueSubmit(VkQueue _queue, + uint32_t submitCount, + const VkSubmitInfo *pSubmits, + VkFence _fence) +{ + TU_FROM_HANDLE(tu_queue, queue, _queue); + + for (uint32_t i = 0; i < submitCount; ++i) { + const VkSubmitInfo *submit = pSubmits + i; + const bool last_submit = (i == submitCount - 1); + struct tu_bo_list bo_list; + tu_bo_list_init(&bo_list); + + uint32_t entry_count = 0; + for (uint32_t j = 0; j < submit->commandBufferCount; ++j) { + TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->pCommandBuffers[j]); + entry_count += cmdbuf->cs.entry_count; + } + + struct drm_msm_gem_submit_cmd cmds[entry_count]; + uint32_t entry_idx = 0; + for (uint32_t j = 0; j < submit->commandBufferCount; ++j) { + TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->pCommandBuffers[j]); + struct tu_cs *cs = &cmdbuf->cs; + for (unsigned i = 0; i < cs->entry_count; ++i, ++entry_idx) { + cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF; + cmds[entry_idx].submit_idx = + tu_bo_list_add(&bo_list, cs->entries[i].bo, + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); + cmds[entry_idx].submit_offset = cs->entries[i].offset; + cmds[entry_idx].size = cs->entries[i].size; + cmds[entry_idx].pad = 0; + cmds[entry_idx].nr_relocs = 0; + cmds[entry_idx].relocs = 0; + } + + tu_bo_list_merge(&bo_list, &cmdbuf->bo_list); + } + + uint32_t flags = MSM_PIPE_3D0; + if (last_submit) { + flags |= MSM_SUBMIT_FENCE_FD_OUT; + } + + struct drm_msm_gem_submit req = { + .flags = flags, + .queueid = queue->msm_queue_id, + .bos = (uint64_t)(uintptr_t) bo_list.bo_infos, + .nr_bos = bo_list.count, + .cmds = (uint64_t)(uintptr_t)cmds, + .nr_cmds = entry_count, + }; + + int ret = drmCommandWriteRead(queue->device->physical_device->local_fd, + DRM_MSM_GEM_SUBMIT, + &req, sizeof(req)); + if (ret) { + fprintf(stderr, "submit failed: %s\n", strerror(errno)); + abort(); + } + + tu_bo_list_destroy(&bo_list); + + if (last_submit) { + /* no need to merge fences as queue execution is serialized */ + tu_fence_update_fd(&queue->submit_fence, req.fence_fd); + } + } + + if (_fence != VK_NULL_HANDLE) { + TU_FROM_HANDLE(tu_fence, fence, _fence); + tu_fence_copy(fence, &queue->submit_fence); + } + + return VK_SUCCESS; +} + +VkResult +tu_QueueWaitIdle(VkQueue _queue) +{ + TU_FROM_HANDLE(tu_queue, queue, _queue); + + tu_fence_wait_idle(&queue->submit_fence); + + return VK_SUCCESS; +} + +VkResult +tu_DeviceWaitIdle(VkDevice _device) +{ + TU_FROM_HANDLE(tu_device, device, _device); + + for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) { + for (unsigned q = 0; q < device->queue_count[i]; q++) { + tu_QueueWaitIdle(tu_queue_to_handle(&device->queues[i][q])); + } + } + return VK_SUCCESS; +} + +VkResult +tu_EnumerateInstanceExtensionProperties(const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) +{ + VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); + + /* We spport no lyaers */ + if (pLayerName) + return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); + + for (int i = 0; i < TU_INSTANCE_EXTENSION_COUNT; i++) { + if (tu_supported_instance_extensions.extensions[i]) { + vk_outarray_append(&out, prop) { *prop = tu_instance_extensions[i]; } + } + } + + return vk_outarray_status(&out); +} + +VkResult +tu_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, + const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) +{ + /* We spport no lyaers */ + TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); + VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); + + /* We spport no lyaers */ + if (pLayerName) + return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); + + for (int i = 0; i < TU_DEVICE_EXTENSION_COUNT; i++) { + if (device->supported_extensions.extensions[i]) { + vk_outarray_append(&out, prop) { *prop = tu_device_extensions[i]; } + } + } + + return vk_outarray_status(&out); +} + +PFN_vkVoidFunction +tu_GetInstanceProcAddr(VkInstance _instance, const char *pName) +{ + TU_FROM_HANDLE(tu_instance, instance, _instance); + + return tu_lookup_entrypoint_checked( + pName, instance ? instance->api_version : 0, + instance ? &instance->enabled_extensions : NULL, NULL); +} + +/* The loader wants us to expose a second GetInstanceProcAddr function + * to work around certain LD_PRELOAD issues seen in apps. + */ +PUBLIC +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName); + +PUBLIC +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName) +{ + return tu_GetInstanceProcAddr(instance, pName); +} + +PFN_vkVoidFunction +tu_GetDeviceProcAddr(VkDevice _device, const char *pName) +{ + TU_FROM_HANDLE(tu_device, device, _device); + + return tu_lookup_entrypoint_checked(pName, device->instance->api_version, + &device->instance->enabled_extensions, + &device->enabled_extensions); +} + +static VkResult +tu_alloc_memory(struct tu_device *device, + const VkMemoryAllocateInfo *pAllocateInfo, + const VkAllocationCallbacks *pAllocator, + VkDeviceMemory *pMem) +{ + struct tu_device_memory *mem; + VkResult result; + + assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); + + if (pAllocateInfo->allocationSize == 0) { + /* Apparently, this is allowed */ + *pMem = VK_NULL_HANDLE; + return VK_SUCCESS; + } + + mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (mem == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + const VkImportMemoryFdInfoKHR *fd_info = + vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); + if (fd_info && !fd_info->handleType) + fd_info = NULL; + + if (fd_info) { + assert(fd_info->handleType == + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || + fd_info->handleType == + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + + /* + * TODO Importing the same fd twice gives us the same handle without + * reference counting. We need to maintain a per-instance handle-to-bo + * table and add reference count to tu_bo. + */ + result = tu_bo_init_dmabuf(device, &mem->bo, + pAllocateInfo->allocationSize, fd_info->fd); + if (result == VK_SUCCESS) { + /* take ownership and close the fd */ + close(fd_info->fd); + } + } else { + result = + tu_bo_init_new(device, &mem->bo, pAllocateInfo->allocationSize); + } + + if (result != VK_SUCCESS) { + vk_free2(&device->alloc, pAllocator, mem); + return result; + } + + mem->size = pAllocateInfo->allocationSize; + mem->type_index = pAllocateInfo->memoryTypeIndex; + + mem->map = NULL; + mem->user_ptr = NULL; + + *pMem = tu_device_memory_to_handle(mem); + + return VK_SUCCESS; +} + +VkResult +tu_AllocateMemory(VkDevice _device, + const VkMemoryAllocateInfo *pAllocateInfo, + const VkAllocationCallbacks *pAllocator, + VkDeviceMemory *pMem) +{ + TU_FROM_HANDLE(tu_device, device, _device); + return tu_alloc_memory(device, pAllocateInfo, pAllocator, pMem); +} + +void +tu_FreeMemory(VkDevice _device, + VkDeviceMemory _mem, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_device_memory, mem, _mem); + + if (mem == NULL) + return; + + tu_bo_finish(device, &mem->bo); + vk_free2(&device->alloc, pAllocator, mem); +} + +VkResult +tu_MapMemory(VkDevice _device, + VkDeviceMemory _memory, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void **ppData) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_device_memory, mem, _memory); + VkResult result; + + if (mem == NULL) { + *ppData = NULL; + return VK_SUCCESS; + } + + if (mem->user_ptr) { + *ppData = mem->user_ptr; + } else if (!mem->map) { + result = tu_bo_map(device, &mem->bo); + if (result != VK_SUCCESS) + return result; + *ppData = mem->map = mem->bo.map; + } else + *ppData = mem->map; + + if (*ppData) { + *ppData += offset; + return VK_SUCCESS; + } + + return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED); +} + +void +tu_UnmapMemory(VkDevice _device, VkDeviceMemory _memory) +{ + /* I do not see any unmapping done by the freedreno Gallium driver. */ +} + +VkResult +tu_FlushMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + +VkResult +tu_InvalidateMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + +void +tu_GetBufferMemoryRequirements(VkDevice _device, + VkBuffer _buffer, + VkMemoryRequirements *pMemoryRequirements) +{ + TU_FROM_HANDLE(tu_buffer, buffer, _buffer); + + pMemoryRequirements->memoryTypeBits = 1; + pMemoryRequirements->alignment = 16; + pMemoryRequirements->size = + align64(buffer->size, pMemoryRequirements->alignment); +} + +void +tu_GetBufferMemoryRequirements2( + VkDevice device, + const VkBufferMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + tu_GetBufferMemoryRequirements(device, pInfo->buffer, + &pMemoryRequirements->memoryRequirements); +} + +void +tu_GetImageMemoryRequirements(VkDevice _device, + VkImage _image, + VkMemoryRequirements *pMemoryRequirements) +{ + TU_FROM_HANDLE(tu_image, image, _image); + + pMemoryRequirements->memoryTypeBits = 1; + pMemoryRequirements->size = image->size; + pMemoryRequirements->alignment = image->alignment; +} + +void +tu_GetImageMemoryRequirements2(VkDevice device, + const VkImageMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + tu_GetImageMemoryRequirements(device, pInfo->image, + &pMemoryRequirements->memoryRequirements); +} + +void +tu_GetImageSparseMemoryRequirements( + VkDevice device, + VkImage image, + uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements *pSparseMemoryRequirements) +{ + tu_stub(); +} + +void +tu_GetImageSparseMemoryRequirements2( + VkDevice device, + const VkImageSparseMemoryRequirementsInfo2 *pInfo, + uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) +{ + tu_stub(); +} + +void +tu_GetDeviceMemoryCommitment(VkDevice device, + VkDeviceMemory memory, + VkDeviceSize *pCommittedMemoryInBytes) +{ + *pCommittedMemoryInBytes = 0; +} + +VkResult +tu_BindBufferMemory2(VkDevice device, + uint32_t bindInfoCount, + const VkBindBufferMemoryInfo *pBindInfos) +{ + for (uint32_t i = 0; i < bindInfoCount; ++i) { + TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory); + TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer); + + if (mem) { + buffer->bo = &mem->bo; + buffer->bo_offset = pBindInfos[i].memoryOffset; + } else { + buffer->bo = NULL; + } + } + return VK_SUCCESS; +} + +VkResult +tu_BindBufferMemory(VkDevice device, + VkBuffer buffer, + VkDeviceMemory memory, + VkDeviceSize memoryOffset) +{ + const VkBindBufferMemoryInfo info = { + .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, + .buffer = buffer, + .memory = memory, + .memoryOffset = memoryOffset + }; + + return tu_BindBufferMemory2(device, 1, &info); +} + +VkResult +tu_BindImageMemory2(VkDevice device, + uint32_t bindInfoCount, + const VkBindImageMemoryInfo *pBindInfos) +{ + for (uint32_t i = 0; i < bindInfoCount; ++i) { + TU_FROM_HANDLE(tu_image, image, pBindInfos[i].image); + TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory); + + if (mem) { + image->bo = &mem->bo; + image->bo_offset = pBindInfos[i].memoryOffset; + } else { + image->bo = NULL; + image->bo_offset = 0; + } + } + + return VK_SUCCESS; +} + +VkResult +tu_BindImageMemory(VkDevice device, + VkImage image, + VkDeviceMemory memory, + VkDeviceSize memoryOffset) +{ + const VkBindImageMemoryInfo info = { + .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, + .image = image, + .memory = memory, + .memoryOffset = memoryOffset + }; + + return tu_BindImageMemory2(device, 1, &info); +} + +VkResult +tu_QueueBindSparse(VkQueue _queue, + uint32_t bindInfoCount, + const VkBindSparseInfo *pBindInfo, + VkFence _fence) +{ + return VK_SUCCESS; +} + +// Queue semaphore functions + +VkResult +tu_CreateSemaphore(VkDevice _device, + const VkSemaphoreCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSemaphore *pSemaphore) +{ + TU_FROM_HANDLE(tu_device, device, _device); + + struct tu_semaphore *sem = + vk_alloc2(&device->alloc, pAllocator, sizeof(*sem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sem) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + *pSemaphore = tu_semaphore_to_handle(sem); + return VK_SUCCESS; +} + +void +tu_DestroySemaphore(VkDevice _device, + VkSemaphore _semaphore, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_semaphore, sem, _semaphore); + if (!_semaphore) + return; + + vk_free2(&device->alloc, pAllocator, sem); +} + +VkResult +tu_CreateEvent(VkDevice _device, + const VkEventCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkEvent *pEvent) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_event *event = + vk_alloc2(&device->alloc, pAllocator, sizeof(*event), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!event) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + *pEvent = tu_event_to_handle(event); + + return VK_SUCCESS; +} + +void +tu_DestroyEvent(VkDevice _device, + VkEvent _event, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_event, event, _event); + + if (!event) + return; + vk_free2(&device->alloc, pAllocator, event); +} + +VkResult +tu_GetEventStatus(VkDevice _device, VkEvent _event) +{ + TU_FROM_HANDLE(tu_event, event, _event); + + if (*event->map == 1) + return VK_EVENT_SET; + return VK_EVENT_RESET; +} + +VkResult +tu_SetEvent(VkDevice _device, VkEvent _event) +{ + TU_FROM_HANDLE(tu_event, event, _event); + *event->map = 1; + + return VK_SUCCESS; +} + +VkResult +tu_ResetEvent(VkDevice _device, VkEvent _event) +{ + TU_FROM_HANDLE(tu_event, event, _event); + *event->map = 0; + + return VK_SUCCESS; +} + +VkResult +tu_CreateBuffer(VkDevice _device, + const VkBufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBuffer *pBuffer) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_buffer *buffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + + buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (buffer == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + buffer->size = pCreateInfo->size; + buffer->usage = pCreateInfo->usage; + buffer->flags = pCreateInfo->flags; + + *pBuffer = tu_buffer_to_handle(buffer); + + return VK_SUCCESS; +} + +void +tu_DestroyBuffer(VkDevice _device, + VkBuffer _buffer, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_buffer, buffer, _buffer); + + if (!buffer) + return; + + vk_free2(&device->alloc, pAllocator, buffer); +} + +static uint32_t +tu_surface_max_layer_count(struct tu_image_view *iview) +{ + return iview->type == VK_IMAGE_VIEW_TYPE_3D + ? iview->extent.depth + : (iview->base_layer + iview->layer_count); +} + +VkResult +tu_CreateFramebuffer(VkDevice _device, + const VkFramebufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFramebuffer *pFramebuffer) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_framebuffer *framebuffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); + + size_t size = sizeof(*framebuffer) + sizeof(struct tu_attachment_info) * + pCreateInfo->attachmentCount; + framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (framebuffer == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + framebuffer->attachment_count = pCreateInfo->attachmentCount; + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + VkImageView _iview = pCreateInfo->pAttachments[i]; + struct tu_image_view *iview = tu_image_view_from_handle(_iview); + framebuffer->attachments[i].attachment = iview; + + framebuffer->width = MIN2(framebuffer->width, iview->extent.width); + framebuffer->height = MIN2(framebuffer->height, iview->extent.height); + framebuffer->layers = + MIN2(framebuffer->layers, tu_surface_max_layer_count(iview)); + } + + *pFramebuffer = tu_framebuffer_to_handle(framebuffer); + return VK_SUCCESS; +} + +void +tu_DestroyFramebuffer(VkDevice _device, + VkFramebuffer _fb, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_framebuffer, fb, _fb); + + if (!fb) + return; + vk_free2(&device->alloc, pAllocator, fb); +} + +static void +tu_init_sampler(struct tu_device *device, + struct tu_sampler *sampler, + const VkSamplerCreateInfo *pCreateInfo) +{ +} + +VkResult +tu_CreateSampler(VkDevice _device, + const VkSamplerCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSampler *pSampler) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sampler) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + tu_init_sampler(device, sampler, pCreateInfo); + *pSampler = tu_sampler_to_handle(sampler); + + return VK_SUCCESS; +} + +void +tu_DestroySampler(VkDevice _device, + VkSampler _sampler, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_sampler, sampler, _sampler); + + if (!sampler) + return; + vk_free2(&device->alloc, pAllocator, sampler); +} + +/* vk_icd.h does not declare this function, so we declare it here to + * suppress Wmissing-prototypes. + */ +PUBLIC VKAPI_ATTR VkResult VKAPI_CALL +vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion); + +PUBLIC VKAPI_ATTR VkResult VKAPI_CALL +vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion) +{ + /* For the full details on loader interface versioning, see + * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>. + * What follows is a condensed summary, to help you navigate the large and + * confusing official doc. + * + * - Loader interface v0 is incompatible with later versions. We don't + * support it. + * + * - In loader interface v1: + * - The first ICD entrypoint called by the loader is + * vk_icdGetInstanceProcAddr(). The ICD must statically expose this + * entrypoint. + * - The ICD must statically expose no other Vulkan symbol unless it + * is linked with -Bsymbolic. + * - Each dispatchable Vulkan handle created by the ICD must be + * a pointer to a struct whose first member is VK_LOADER_DATA. The + * ICD must initialize VK_LOADER_DATA.loadMagic to + * ICD_LOADER_MAGIC. + * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and + * vkDestroySurfaceKHR(). The ICD must be capable of working with + * such loader-managed surfaces. + * + * - Loader interface v2 differs from v1 in: + * - The first ICD entrypoint called by the loader is + * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must + * statically expose this entrypoint. + * + * - Loader interface v3 differs from v2 in: + * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), + * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, + * because the loader no longer does so. + */ + *pSupportedVersion = MIN2(*pSupportedVersion, 3u); + return VK_SUCCESS; +} + +VkResult +tu_GetMemoryFdKHR(VkDevice _device, + const VkMemoryGetFdInfoKHR *pGetFdInfo, + int *pFd) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory); + + assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR); + + /* At the moment, we support only the below handle types. */ + assert(pGetFdInfo->handleType == + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || + pGetFdInfo->handleType == + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + + int prime_fd = tu_bo_export_dmabuf(device, &memory->bo); + if (prime_fd < 0) + return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + *pFd = prime_fd; + return VK_SUCCESS; +} + +VkResult +tu_GetMemoryFdPropertiesKHR(VkDevice _device, + VkExternalMemoryHandleTypeFlagBits handleType, + int fd, + VkMemoryFdPropertiesKHR *pMemoryFdProperties) +{ + assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + pMemoryFdProperties->memoryTypeBits = 1; + return VK_SUCCESS; +} + +void +tu_GetPhysicalDeviceExternalSemaphoreProperties( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo, + VkExternalSemaphoreProperties *pExternalSemaphoreProperties) +{ + pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; + pExternalSemaphoreProperties->compatibleHandleTypes = 0; + pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; +} + +void +tu_GetPhysicalDeviceExternalFenceProperties( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo, + VkExternalFenceProperties *pExternalFenceProperties) +{ + pExternalFenceProperties->exportFromImportedHandleTypes = 0; + pExternalFenceProperties->compatibleHandleTypes = 0; + pExternalFenceProperties->externalFenceFeatures = 0; +} + +VkResult +tu_CreateDebugReportCallbackEXT( + VkInstance _instance, + const VkDebugReportCallbackCreateInfoEXT *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDebugReportCallbackEXT *pCallback) +{ + TU_FROM_HANDLE(tu_instance, instance, _instance); + return vk_create_debug_report_callback(&instance->debug_report_callbacks, + pCreateInfo, pAllocator, + &instance->alloc, pCallback); +} + +void +tu_DestroyDebugReportCallbackEXT(VkInstance _instance, + VkDebugReportCallbackEXT _callback, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_instance, instance, _instance); + vk_destroy_debug_report_callback(&instance->debug_report_callbacks, + _callback, pAllocator, &instance->alloc); +} + +void +tu_DebugReportMessageEXT(VkInstance _instance, + VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objectType, + uint64_t object, + size_t location, + int32_t messageCode, + const char *pLayerPrefix, + const char *pMessage) +{ + TU_FROM_HANDLE(tu_instance, instance, _instance); + vk_debug_report(&instance->debug_report_callbacks, flags, objectType, + object, location, messageCode, pLayerPrefix, pMessage); +} + +void +tu_GetDeviceGroupPeerMemoryFeatures( + VkDevice device, + uint32_t heapIndex, + uint32_t localDeviceIndex, + uint32_t remoteDeviceIndex, + VkPeerMemoryFeatureFlags *pPeerMemoryFeatures) +{ + assert(localDeviceIndex == remoteDeviceIndex); + + *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | + VK_PEER_MEMORY_FEATURE_COPY_DST_BIT | + VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | + VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT; +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_drm.c b/lib/mesa/src/freedreno/vulkan/tu_drm.c new file mode 100644 index 000000000..9b2e6f788 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_drm.c @@ -0,0 +1,194 @@ +/* + * Copyright © 2018 Google, Inc. + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include <errno.h> +#include <fcntl.h> +#include <stdint.h> +#include <sys/ioctl.h> +#include <xf86drm.h> + +#include "drm-uapi/msm_drm.h" + +static int +tu_drm_get_param(const struct tu_physical_device *dev, + uint32_t param, + uint64_t *value) +{ + /* Technically this requires a pipe, but the kernel only supports one pipe + * anyway at the time of writing and most of these are clearly pipe + * independent. */ + struct drm_msm_param req = { + .pipe = MSM_PIPE_3D0, + .param = param, + }; + + int ret = drmCommandWriteRead(dev->local_fd, DRM_MSM_GET_PARAM, &req, + sizeof(req)); + if (ret) + return ret; + + *value = req.value; + + return 0; +} + +int +tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id) +{ + uint64_t value; + int ret = tu_drm_get_param(dev, MSM_PARAM_GPU_ID, &value); + if (ret) + return ret; + + *id = value; + return 0; +} + +int +tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size) +{ + uint64_t value; + int ret = tu_drm_get_param(dev, MSM_PARAM_GMEM_SIZE, &value); + if (ret) + return ret; + + *size = value; + return 0; +} + +int +tu_drm_submitqueue_new(const struct tu_device *dev, + int priority, + uint32_t *queue_id) +{ + struct drm_msm_submitqueue req = { + .flags = 0, + .prio = priority, + }; + + int ret = drmCommandWriteRead(dev->physical_device->local_fd, + DRM_MSM_SUBMITQUEUE_NEW, &req, sizeof(req)); + if (ret) + return ret; + + *queue_id = req.id; + return 0; +} + +void +tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id) +{ + drmCommandWrite(dev->physical_device->local_fd, DRM_MSM_SUBMITQUEUE_CLOSE, + &queue_id, sizeof(uint32_t)); +} + +/** + * Return gem handle on success. Return 0 on failure. + */ +uint32_t +tu_gem_new(const struct tu_device *dev, uint64_t size, uint32_t flags) +{ + struct drm_msm_gem_new req = { + .size = size, + .flags = flags, + }; + + int ret = drmCommandWriteRead(dev->physical_device->local_fd, + DRM_MSM_GEM_NEW, &req, sizeof(req)); + if (ret) + return 0; + + return req.handle; +} + +uint32_t +tu_gem_import_dmabuf(const struct tu_device *dev, int prime_fd, uint64_t size) +{ + /* lseek() to get the real size */ + off_t real_size = lseek(prime_fd, 0, SEEK_END); + lseek(prime_fd, 0, SEEK_SET); + if (real_size < 0 || (uint64_t) real_size < size) + return 0; + + uint32_t gem_handle; + int ret = drmPrimeFDToHandle(dev->physical_device->local_fd, prime_fd, + &gem_handle); + if (ret) + return 0; + + return gem_handle; +} + +int +tu_gem_export_dmabuf(const struct tu_device *dev, uint32_t gem_handle) +{ + int prime_fd; + int ret = drmPrimeHandleToFD(dev->physical_device->local_fd, gem_handle, + DRM_CLOEXEC, &prime_fd); + + return ret == 0 ? prime_fd : -1; +} + +void +tu_gem_close(const struct tu_device *dev, uint32_t gem_handle) +{ + struct drm_gem_close req = { + .handle = gem_handle, + }; + + drmIoctl(dev->physical_device->local_fd, DRM_IOCTL_GEM_CLOSE, &req); +} + +/** Return UINT64_MAX on error. */ +static uint64_t +tu_gem_info(const struct tu_device *dev, uint32_t gem_handle, uint32_t info) +{ + struct drm_msm_gem_info req = { + .handle = gem_handle, + .info = info, + }; + + int ret = drmCommandWriteRead(dev->physical_device->local_fd, + DRM_MSM_GEM_INFO, &req, sizeof(req)); + if (ret == -1) + return UINT64_MAX; + + return req.value; +} + +/** Return UINT64_MAX on error. */ +uint64_t +tu_gem_info_offset(const struct tu_device *dev, uint32_t gem_handle) +{ + return tu_gem_info(dev, gem_handle, MSM_INFO_GET_OFFSET); +} + +/** Return UINT64_MAX on error. */ +uint64_t +tu_gem_info_iova(const struct tu_device *dev, uint32_t gem_handle) +{ + return tu_gem_info(dev, gem_handle, MSM_INFO_GET_IOVA); +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_entrypoints_gen.py b/lib/mesa/src/freedreno/vulkan/tu_entrypoints_gen.py new file mode 100644 index 000000000..facbb69fa --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_entrypoints_gen.py @@ -0,0 +1,510 @@ +# coding=utf-8 +# +# Copyright © 2015, 2017 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +import argparse +import copy +import functools +import math +import os +import xml.etree.cElementTree as et + +from collections import OrderedDict, namedtuple +from mako.template import Template + +from tu_extensions import VkVersion, MAX_API_VERSION, EXTENSIONS + +# We generate a static hash table for entry point lookup +# (vkGetProcAddress). We use a linear congruential generator for our hash +# function and a power-of-two size table. The prime numbers are determined +# experimentally. + +# We currently don't use layers in tu, but keeping the ability for anv +# anyways, so we can use it for device groups. +LAYERS = [ + 'tu' +] + +TEMPLATE_H = Template("""\ +/* This file generated from ${filename}, don't edit directly. */ + +struct tu_dispatch_table { + union { + void *entrypoints[${len(entrypoints)}]; + struct { + % for e in entrypoints: + % if e.guard is not None: +#ifdef ${e.guard} + PFN_${e.name} ${e.name}; +#else + void *${e.name}; +# endif + % else: + PFN_${e.name} ${e.name}; + % endif + % endfor + }; + }; +}; + +% for e in entrypoints: + % if e.alias: + <% continue %> + % endif + % if e.guard is not None: +#ifdef ${e.guard} + % endif + % for layer in LAYERS: + ${e.return_type} ${e.prefixed_name(layer)}(${e.decl_params()}); + % endfor + % if e.guard is not None: +#endif // ${e.guard} + % endif +% endfor +""", output_encoding='utf-8') + +TEMPLATE_C = Template(u"""\ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* This file generated from ${filename}, don't edit directly. */ + +#include "tu_private.h" + +struct string_map_entry { + uint32_t name; + uint32_t hash; + uint32_t num; +}; + +/* We use a big string constant to avoid lots of relocations from the entry + * point table to lots of little strings. The entries in the entry point table + * store the index into this big string. + */ + +static const char strings[] = +% for s in strmap.sorted_strings: + "${s.string}\\0" +% endfor +; + +static const struct string_map_entry string_map_entries[] = { +% for s in strmap.sorted_strings: + { ${s.offset}, ${'{:0=#8x}'.format(s.hash)}, ${s.num} }, /* ${s.string} */ +% endfor +}; + +/* Hash table stats: + * size ${len(strmap.sorted_strings)} entries + * collisions entries: +% for i in range(10): + * ${i}${'+' if i == 9 else ' '} ${strmap.collisions[i]} +% endfor + */ + +#define none 0xffff +static const uint16_t string_map[${strmap.hash_size}] = { +% for e in strmap.mapping: + ${ '{:0=#6x}'.format(e) if e >= 0 else 'none' }, +% endfor +}; + +/* Weak aliases for all potential implementations. These will resolve to + * NULL if they're not defined, which lets the resolve_entrypoint() function + * either pick the correct entry point. + */ + +% for layer in LAYERS: + % for e in entrypoints: + % if e.alias: + <% continue %> + % endif + % if e.guard is not None: +#ifdef ${e.guard} + % endif + ${e.return_type} ${e.prefixed_name(layer)}(${e.decl_params()}) __attribute__ ((weak)); + % if e.guard is not None: +#endif // ${e.guard} + % endif + % endfor + + const struct tu_dispatch_table ${layer}_layer = { + % for e in entrypoints: + % if e.guard is not None: +#ifdef ${e.guard} + % endif + .${e.name} = ${e.prefixed_name(layer)}, + % if e.guard is not None: +#endif // ${e.guard} + % endif + % endfor + }; +% endfor + +static void * __attribute__ ((noinline)) +tu_resolve_entrypoint(uint32_t index) +{ + return tu_layer.entrypoints[index]; +} + +/** Return true if the core version or extension in which the given entrypoint + * is defined is enabled. + * + * If instance is NULL, we only allow the 3 commands explicitly allowed by the vk + * spec. + * + * If device is NULL, all device extensions are considered enabled. + */ +static bool +tu_entrypoint_is_enabled(int index, uint32_t core_version, + const struct tu_instance_extension_table *instance, + const struct tu_device_extension_table *device) +{ + switch (index) { +% for e in entrypoints: + case ${e.num}: + % if not e.device_command: + if (device) return false; + % endif + % if e.name == 'vkCreateInstance' or e.name == 'vkEnumerateInstanceExtensionProperties' or e.name == 'vkEnumerateInstanceLayerProperties' or e.name == 'vkEnumerateInstanceVersion': + return !device; + % elif e.core_version: + return instance && ${e.core_version.c_vk_version()} <= core_version; + % elif e.extensions: + % for ext in e.extensions: + % if ext.type == 'instance': + if (instance && instance->${ext.name[3:]}) return true; + % else: + if (instance && (!device || device->${ext.name[3:]})) return true; + % endif + %endfor + return false; + % else: + return instance; + % endif +% endfor + default: + return false; + } +} + +static int +tu_lookup_entrypoint(const char *name) +{ + static const uint32_t prime_factor = ${strmap.prime_factor}; + static const uint32_t prime_step = ${strmap.prime_step}; + const struct string_map_entry *e; + uint32_t hash, h; + uint16_t i; + const char *p; + + hash = 0; + for (p = name; *p; p++) + hash = hash * prime_factor + *p; + + h = hash; + while (1) { + i = string_map[h & ${strmap.hash_mask}]; + if (i == none) + return -1; + e = &string_map_entries[i]; + if (e->hash == hash && strcmp(name, strings + e->name) == 0) + return e->num; + h += prime_step; + } + + return -1; +} + +void * +tu_lookup_entrypoint_unchecked(const char *name) +{ + int index = tu_lookup_entrypoint(name); + if (index < 0) + return NULL; + return tu_resolve_entrypoint(index); +} + +void * +tu_lookup_entrypoint_checked(const char *name, + uint32_t core_version, + const struct tu_instance_extension_table *instance, + const struct tu_device_extension_table *device) +{ + int index = tu_lookup_entrypoint(name); + if (index < 0 || !tu_entrypoint_is_enabled(index, core_version, instance, device)) + return NULL; + return tu_resolve_entrypoint(index); +}""", output_encoding='utf-8') + +U32_MASK = 2**32 - 1 + +PRIME_FACTOR = 5024183 +PRIME_STEP = 19 + +def round_to_pow2(x): + return 2**int(math.ceil(math.log(x, 2))) + +class StringIntMapEntry(object): + def __init__(self, string, num): + self.string = string + self.num = num + + # Calculate the same hash value that we will calculate in C. + h = 0 + for c in string: + h = ((h * PRIME_FACTOR) + ord(c)) & U32_MASK + self.hash = h + + self.offset = None + +class StringIntMap(object): + def __init__(self): + self.baked = False + self.strings = dict() + + def add_string(self, string, num): + assert not self.baked + assert string not in self.strings + assert num >= 0 and num < 2**31 + self.strings[string] = StringIntMapEntry(string, num) + + def bake(self): + self.sorted_strings = \ + sorted(self.strings.values(), key=lambda x: x.string) + offset = 0 + for entry in self.sorted_strings: + entry.offset = offset + offset += len(entry.string) + 1 + + # Save off some values that we'll need in C + self.hash_size = round_to_pow2(len(self.strings) * 1.25) + self.hash_mask = self.hash_size - 1 + self.prime_factor = PRIME_FACTOR + self.prime_step = PRIME_STEP + + self.mapping = [-1] * self.hash_size + self.collisions = [0] * 10 + for idx, s in enumerate(self.sorted_strings): + level = 0 + h = s.hash + while self.mapping[h & self.hash_mask] >= 0: + h = h + PRIME_STEP + level = level + 1 + self.collisions[min(level, 9)] += 1 + self.mapping[h & self.hash_mask] = idx + +EntrypointParam = namedtuple('EntrypointParam', 'type name decl') + +class EntrypointBase(object): + def __init__(self, name): + self.name = name + self.alias = None + self.guard = None + self.enabled = False + self.num = None + # Extensions which require this entrypoint + self.core_version = None + self.extensions = [] + +class Entrypoint(EntrypointBase): + def __init__(self, name, return_type, params, guard = None): + super(Entrypoint, self).__init__(name) + self.return_type = return_type + self.params = params + self.guard = guard + self.device_command = len(params) > 0 and (params[0].type == 'VkDevice' or params[0].type == 'VkQueue' or params[0].type == 'VkCommandBuffer') + + def prefixed_name(self, prefix): + assert self.name.startswith('vk') + return prefix + '_' + self.name[2:] + + def decl_params(self): + return ', '.join(p.decl for p in self.params) + + def call_params(self): + return ', '.join(p.name for p in self.params) + +class EntrypointAlias(EntrypointBase): + def __init__(self, name, entrypoint): + super(EntrypointAlias, self).__init__(name) + self.alias = entrypoint + self.device_command = entrypoint.device_command + + def prefixed_name(self, prefix): + return self.alias.prefixed_name(prefix) + +def get_entrypoints(doc, entrypoints_to_defines, start_index): + """Extract the entry points from the registry.""" + entrypoints = OrderedDict() + + for command in doc.findall('./commands/command'): + if 'alias' in command.attrib: + alias = command.attrib['name'] + target = command.attrib['alias'] + entrypoints[alias] = EntrypointAlias(alias, entrypoints[target]) + else: + name = command.find('./proto/name').text + ret_type = command.find('./proto/type').text + params = [EntrypointParam( + type = p.find('./type').text, + name = p.find('./name').text, + decl = ''.join(p.itertext()) + ) for p in command.findall('./param')] + guard = entrypoints_to_defines.get(name) + # They really need to be unique + assert name not in entrypoints + entrypoints[name] = Entrypoint(name, ret_type, params, guard) + + for feature in doc.findall('./feature'): + assert feature.attrib['api'] == 'vulkan' + version = VkVersion(feature.attrib['number']) + if version > MAX_API_VERSION: + continue + + for command in feature.findall('./require/command'): + e = entrypoints[command.attrib['name']] + e.enabled = True + assert e.core_version is None + e.core_version = version + + supported_exts = dict((ext.name, ext) for ext in EXTENSIONS) + for extension in doc.findall('.extensions/extension'): + ext_name = extension.attrib['name'] + if ext_name not in supported_exts: + continue + + ext = supported_exts[ext_name] + ext.type = extension.attrib['type'] + + for command in extension.findall('./require/command'): + e = entrypoints[command.attrib['name']] + e.enabled = True + assert e.core_version is None + e.extensions.append(ext) + + # if the base command is not supported by the driver yet, don't alias aliases + for e in entrypoints.values(): + if e.alias and not e.alias.enabled: + e_clone = copy.deepcopy(e.alias) + e_clone.enabled = True + e_clone.name = e.name + entrypoints[e.name] = e_clone + + return [e for e in entrypoints.values() if e.enabled] + + +def get_entrypoints_defines(doc): + """Maps entry points to extension defines.""" + entrypoints_to_defines = {} + + for extension in doc.findall('./extensions/extension[@protect]'): + define = extension.attrib['protect'] + + for entrypoint in extension.findall('./require/command'): + fullname = entrypoint.attrib['name'] + entrypoints_to_defines[fullname] = define + + platform_define = {} + for platform in doc.findall('./platforms/platform'): + name = platform.attrib['name'] + define = platform.attrib['protect'] + platform_define[name] = define + + for extension in doc.findall('./extensions/extension[@platform]'): + platform = extension.attrib['platform'] + define = platform_define[platform] + + for entrypoint in extension.findall('./require/command'): + fullname = entrypoint.attrib['name'] + entrypoints_to_defines[fullname] = define + + return entrypoints_to_defines + + +def gen_code(entrypoints): + """Generate the C code.""" + strmap = StringIntMap() + for e in entrypoints: + strmap.add_string(e.name, e.num) + strmap.bake() + + return TEMPLATE_C.render(entrypoints=entrypoints, + LAYERS=LAYERS, + strmap=strmap, + filename=os.path.basename(__file__)) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--outdir', help='Where to write the files.', + required=True) + parser.add_argument('--xml', + help='Vulkan API XML file.', + required=True, + action='append', + dest='xml_files') + args = parser.parse_args() + + entrypoints = [] + + for filename in args.xml_files: + doc = et.parse(filename) + entrypoints += get_entrypoints(doc, get_entrypoints_defines(doc), + start_index=len(entrypoints)) + + for num, e in enumerate(entrypoints): + e.num = num + + # For outputting entrypoints.h we generate a tu_EntryPoint() prototype + # per entry point. + with open(os.path.join(args.outdir, 'tu_entrypoints.h'), 'wb') as f: + f.write(TEMPLATE_H.render(entrypoints=entrypoints, + LAYERS=LAYERS, + filename=os.path.basename(__file__))) + with open(os.path.join(args.outdir, 'tu_entrypoints.c'), 'wb') as f: + f.write(gen_code(entrypoints)) + + +if __name__ == '__main__': + main() diff --git a/lib/mesa/src/freedreno/vulkan/tu_extensions.py b/lib/mesa/src/freedreno/vulkan/tu_extensions.py new file mode 100644 index 000000000..0a45b859e --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_extensions.py @@ -0,0 +1,279 @@ +COPYRIGHT = """\ +/* + * Copyright 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +""" + +import argparse +import copy +import re +import xml.etree.cElementTree as et + +from mako.template import Template + +MAX_API_VERSION = '1.1.82' + +class Extension: + def __init__(self, name, ext_version, enable): + self.name = name + self.ext_version = int(ext_version) + if enable is True: + self.enable = 'true'; + elif enable is False: + self.enable = 'false'; + else: + self.enable = enable; + +# On Android, we disable all surface and swapchain extensions. Android's Vulkan +# loader implements VK_KHR_surface and VK_KHR_swapchain, and applications +# cannot access the driver's implementation. Moreoever, if the driver exposes +# the those extension strings, then tests dEQP-VK.api.info.instance.extensions +# and dEQP-VK.api.info.device fail due to the duplicated strings. +EXTENSIONS = [ + Extension('VK_KHR_bind_memory2', 1, True), + Extension('VK_KHR_create_renderpass2', 1, True), + Extension('VK_KHR_dedicated_allocation', 1, True), + Extension('VK_KHR_get_display_properties2', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), + Extension('VK_KHR_get_memory_requirements2', 1, True), + Extension('VK_KHR_get_physical_device_properties2', 1, True), + Extension('VK_KHR_get_surface_capabilities2', 1, 'TU_HAS_SURFACE'), + Extension('VK_KHR_maintenance1', 1, True), + Extension('VK_KHR_maintenance2', 1, True), + Extension('VK_KHR_maintenance3', 1, True), + Extension('VK_KHR_surface', 25, 'TU_HAS_SURFACE'), + Extension('VK_KHR_swapchain', 68, 'TU_HAS_SURFACE'), + Extension('VK_KHR_wayland_surface', 6, 'VK_USE_PLATFORM_WAYLAND_KHR'), + Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'), + Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'), + Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'), + Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), + Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'), + Extension('VK_EXT_display_surface_counter', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), + Extension('VK_EXT_display_control', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), + Extension('VK_EXT_debug_report', 9, True), + Extension('VK_KHR_external_memory_capabilities', 1, True), + Extension('VK_KHR_external_memory', 1, True), + Extension('VK_KHR_external_memory_fd', 1, True), + Extension('VK_EXT_external_memory_dma_buf', 1, True), +] + +class VkVersion: + def __init__(self, string): + split = string.split('.') + self.major = int(split[0]) + self.minor = int(split[1]) + if len(split) > 2: + assert len(split) == 3 + self.patch = int(split[2]) + else: + self.patch = None + + # Sanity check. The range bits are required by the definition of the + # VK_MAKE_VERSION macro + assert self.major < 1024 and self.minor < 1024 + assert self.patch is None or self.patch < 4096 + assert(str(self) == string) + + def __str__(self): + ver_list = [str(self.major), str(self.minor)] + if self.patch is not None: + ver_list.append(str(self.patch)) + return '.'.join(ver_list) + + def c_vk_version(self): + patch = self.patch if self.patch is not None else 0 + ver_list = [str(self.major), str(self.minor), str(patch)] + return 'VK_MAKE_VERSION(' + ', '.join(ver_list) + ')' + + def __int_ver(self): + # This is just an expansion of VK_VERSION + patch = self.patch if self.patch is not None else 0 + return (self.major << 22) | (self.minor << 12) | patch + + def __gt__(self, other): + # If only one of them has a patch version, "ignore" it by making + # other's patch version match self. + if (self.patch is None) != (other.patch is None): + other = copy.copy(other) + other.patch = self.patch + + return self.__int_ver() > other.__int_ver() + +MAX_API_VERSION = VkVersion(MAX_API_VERSION) + +def _init_exts_from_xml(xml): + """ Walk the Vulkan XML and fill out extra extension information. """ + + xml = et.parse(xml) + + ext_name_map = {} + for ext in EXTENSIONS: + ext_name_map[ext.name] = ext + + for ext_elem in xml.findall('.extensions/extension'): + ext_name = ext_elem.attrib['name'] + if ext_name not in ext_name_map: + continue + + ext = ext_name_map[ext_name] + ext.type = ext_elem.attrib['type'] + +_TEMPLATE_H = Template(COPYRIGHT + """ +#ifndef TU_EXTENSIONS_H +#define TU_EXTENSIONS_H + +enum { + TU_INSTANCE_EXTENSION_COUNT = ${len(instance_extensions)}, + TU_DEVICE_EXTENSION_COUNT = ${len(device_extensions)}, +}; + +struct tu_instance_extension_table { + union { + bool extensions[TU_INSTANCE_EXTENSION_COUNT]; + struct { +%for ext in instance_extensions: + bool ${ext.name[3:]}; +%endfor + }; + }; +}; + +struct tu_device_extension_table { + union { + bool extensions[TU_DEVICE_EXTENSION_COUNT]; + struct { +%for ext in device_extensions: + bool ${ext.name[3:]}; +%endfor + }; + }; +}; + +extern const VkExtensionProperties tu_instance_extensions[TU_INSTANCE_EXTENSION_COUNT]; +extern const VkExtensionProperties tu_device_extensions[TU_DEVICE_EXTENSION_COUNT]; +extern const struct tu_instance_extension_table tu_supported_instance_extensions; + + +struct tu_physical_device; + +void tu_fill_device_extension_table(const struct tu_physical_device *device, + struct tu_device_extension_table* table); +#endif +""") + +_TEMPLATE_C = Template(COPYRIGHT + """ +#include "tu_private.h" + +#include "vk_util.h" + +/* Convert the VK_USE_PLATFORM_* defines to booleans */ +%for platform in ['ANDROID_KHR', 'WAYLAND_KHR', 'XCB_KHR', 'XLIB_KHR', 'DISPLAY_KHR', 'XLIB_XRANDR_EXT']: +#ifdef VK_USE_PLATFORM_${platform} +# undef VK_USE_PLATFORM_${platform} +# define VK_USE_PLATFORM_${platform} true +#else +# define VK_USE_PLATFORM_${platform} false +#endif +%endfor + +/* And ANDROID too */ +#ifdef ANDROID +# undef ANDROID +# define ANDROID true +#else +# define ANDROID false +#endif + +#define TU_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || \\ + VK_USE_PLATFORM_XCB_KHR || \\ + VK_USE_PLATFORM_XLIB_KHR || \\ + VK_USE_PLATFORM_DISPLAY_KHR) + + +const VkExtensionProperties tu_instance_extensions[TU_INSTANCE_EXTENSION_COUNT] = { +%for ext in instance_extensions: + {"${ext.name}", ${ext.ext_version}}, +%endfor +}; + +const VkExtensionProperties tu_device_extensions[TU_DEVICE_EXTENSION_COUNT] = { +%for ext in device_extensions: + {"${ext.name}", ${ext.ext_version}}, +%endfor +}; + +const struct tu_instance_extension_table tu_supported_instance_extensions = { +%for ext in instance_extensions: + .${ext.name[3:]} = ${ext.enable}, +%endfor +}; + +void tu_fill_device_extension_table(const struct tu_physical_device *device, + struct tu_device_extension_table* table) +{ +%for ext in device_extensions: + table->${ext.name[3:]} = ${ext.enable}; +%endfor +} + +VkResult tu_EnumerateInstanceVersion( + uint32_t* pApiVersion) +{ + *pApiVersion = ${MAX_API_VERSION.c_vk_version()}; + return VK_SUCCESS; +} + +uint32_t +tu_physical_device_api_version(struct tu_physical_device *dev) +{ + return VK_MAKE_VERSION(1, 1, 82); +} +""") + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--out-c', help='Output C file.', required=True) + parser.add_argument('--out-h', help='Output H file.', required=True) + parser.add_argument('--xml', + help='Vulkan API XML file.', + required=True, + action='append', + dest='xml_files') + args = parser.parse_args() + + for filename in args.xml_files: + _init_exts_from_xml(filename) + + for ext in EXTENSIONS: + assert ext.type == 'instance' or ext.type == 'device' + + template_env = { + 'MAX_API_VERSION': MAX_API_VERSION, + 'instance_extensions': [e for e in EXTENSIONS if e.type == 'instance'], + 'device_extensions': [e for e in EXTENSIONS if e.type == 'device'], + } + + with open(args.out_c, 'w') as f: + f.write(_TEMPLATE_C.render(**template_env)) + with open(args.out_h, 'w') as f: + f.write(_TEMPLATE_H.render(**template_env)) diff --git a/lib/mesa/src/freedreno/vulkan/tu_fence.c b/lib/mesa/src/freedreno/vulkan/tu_fence.c new file mode 100644 index 000000000..793f0ab3c --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_fence.c @@ -0,0 +1,381 @@ +/* + * Copyright © 2019 Google LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include <fcntl.h> +#include <libsync.h> +#include <unistd.h> + +#include "util/os_time.h" + +/** + * Internally, a fence can be in one of these states. + */ +enum tu_fence_state +{ + TU_FENCE_STATE_RESET, + TU_FENCE_STATE_PENDING, + TU_FENCE_STATE_SIGNALED, +}; + +static enum tu_fence_state +tu_fence_get_state(const struct tu_fence *fence) +{ + if (fence->signaled) + assert(fence->fd < 0); + + if (fence->signaled) + return TU_FENCE_STATE_SIGNALED; + else if (fence->fd >= 0) + return TU_FENCE_STATE_PENDING; + else + return TU_FENCE_STATE_RESET; +} + +static void +tu_fence_set_state(struct tu_fence *fence, enum tu_fence_state state, int fd) +{ + if (fence->fd >= 0) + close(fence->fd); + + switch (state) { + case TU_FENCE_STATE_RESET: + assert(fd < 0); + fence->signaled = false; + fence->fd = -1; + break; + case TU_FENCE_STATE_PENDING: + assert(fd >= 0); + fence->signaled = false; + fence->fd = fd; + break; + case TU_FENCE_STATE_SIGNALED: + assert(fd < 0); + fence->signaled = true; + fence->fd = -1; + break; + default: + unreachable("unknown fence state"); + break; + } +} + +void +tu_fence_init(struct tu_fence *fence, bool signaled) +{ + fence->signaled = signaled; + fence->fd = -1; +} + +void +tu_fence_finish(struct tu_fence *fence) +{ + if (fence->fd >= 0) + close(fence->fd); +} + +/** + * Update the associated fd of a fence. Ownership of \a fd is transferred to + * \a fence. + * + * This function does not block. \a fence can also be in any state when this + * function is called. To be able to do that, the caller must make sure that, + * when both the currently associated fd and the new fd are valid, they are on + * the same timeline with the new fd being later on the timeline. + */ +void +tu_fence_update_fd(struct tu_fence *fence, int fd) +{ + const enum tu_fence_state state = + fd >= 0 ? TU_FENCE_STATE_PENDING : TU_FENCE_STATE_SIGNALED; + tu_fence_set_state(fence, state, fd); +} + +/** + * Make a fence a copy of another fence. \a fence must be in the reset state. + */ +void +tu_fence_copy(struct tu_fence *fence, const struct tu_fence *src) +{ + assert(tu_fence_get_state(fence) == TU_FENCE_STATE_RESET); + + /* dup src->fd */ + int fd = -1; + if (src->fd >= 0) { + fd = fcntl(src->fd, F_DUPFD_CLOEXEC, 0); + if (fd < 0) { + tu_loge("failed to dup fd %d for fence", src->fd); + sync_wait(src->fd, -1); + } + } + + tu_fence_update_fd(fence, fd); +} + +/** + * Signal a fence. \a fence must be in the reset state. + */ +void +tu_fence_signal(struct tu_fence *fence) +{ + assert(tu_fence_get_state(fence) == TU_FENCE_STATE_RESET); + tu_fence_set_state(fence, TU_FENCE_STATE_SIGNALED, -1); +} + +/** + * Wait until a fence is idle (i.e., not pending). + */ +void +tu_fence_wait_idle(struct tu_fence *fence) +{ + if (fence->fd >= 0) { + if (sync_wait(fence->fd, -1)) + tu_loge("sync_wait on fence fd %d failed", fence->fd); + + tu_fence_set_state(fence, TU_FENCE_STATE_SIGNALED, -1); + } +} + +VkResult +tu_CreateFence(VkDevice _device, + const VkFenceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFence *pFence) +{ + TU_FROM_HANDLE(tu_device, device, _device); + + struct tu_fence *fence = + vk_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!fence) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + tu_fence_init(fence, pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT); + + *pFence = tu_fence_to_handle(fence); + + return VK_SUCCESS; +} + +void +tu_DestroyFence(VkDevice _device, + VkFence _fence, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_fence, fence, _fence); + + if (!fence) + return; + + tu_fence_finish(fence); + + vk_free2(&device->alloc, pAllocator, fence); +} + +/** + * Initialize a pollfd array from fences. + */ +static nfds_t +tu_fence_init_poll_fds(uint32_t fence_count, + const VkFence *fences, + bool wait_all, + struct pollfd *fds) +{ + nfds_t nfds = 0; + for (uint32_t i = 0; i < fence_count; i++) { + TU_FROM_HANDLE(tu_fence, fence, fences[i]); + + if (fence->signaled) { + if (wait_all) { + /* skip signaled fences */ + continue; + } else { + /* no need to poll any fd */ + nfds = 0; + break; + } + } + + /* negative fds are never ready, which is the desired behavior */ + fds[nfds].fd = fence->fd; + fds[nfds].events = POLLIN; + fds[nfds].revents = 0; + nfds++; + } + + return nfds; +} + +/** + * Translate timeout from nanoseconds to milliseconds for poll(). + */ +static int +tu_fence_get_poll_timeout(uint64_t timeout_ns) +{ + const uint64_t ns_per_ms = 1000 * 1000; + uint64_t timeout_ms = timeout_ns / ns_per_ms; + + /* round up if needed */ + if (timeout_ns - timeout_ms * ns_per_ms >= ns_per_ms / 2) + timeout_ms++; + + return timeout_ms < INT_MAX ? timeout_ms : INT_MAX; +} + +/** + * Poll a pollfd array. + */ +static VkResult +tu_fence_poll_fds(struct pollfd *fds, nfds_t nfds, uint64_t *timeout_ns) +{ + while (true) { + /* poll */ + uint64_t duration = os_time_get_nano(); + int ret = poll(fds, nfds, tu_fence_get_poll_timeout(*timeout_ns)); + duration = os_time_get_nano() - duration; + + /* update timeout_ns */ + if (*timeout_ns > duration) + *timeout_ns -= duration; + else + *timeout_ns = 0; + + if (ret > 0) { + return VK_SUCCESS; + } else if (ret == 0) { + if (!*timeout_ns) + return VK_TIMEOUT; + } else if (errno != EINTR && errno != EAGAIN) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + } +} + +/** + * Update a pollfd array and the fence states. This should be called after a + * successful call to tu_fence_poll_fds. + */ +static nfds_t +tu_fence_update_fences_and_poll_fds(uint32_t fence_count, + const VkFence *fences, + bool wait_all, + struct pollfd *fds) +{ + uint32_t nfds = 0; + uint32_t fds_idx = 0; + for (uint32_t i = 0; i < fence_count; i++) { + TU_FROM_HANDLE(tu_fence, fence, fences[i]); + + /* no signaled fence in fds */ + if (fence->signaled) + continue; + + /* fds[fds_idx] corresponds to fences[i] */ + assert(fence->fd == fds[fds_idx].fd); + + assert(nfds <= fds_idx && fds_idx <= i); + + /* fd is ready (errors are treated as ready) */ + if (fds[fds_idx].revents) { + tu_fence_set_state(fence, TU_FENCE_STATE_SIGNALED, -1); + } else if (wait_all) { + /* add to fds again for another poll */ + fds[nfds].fd = fence->fd; + fds[nfds].events = POLLIN; + fds[nfds].revents = 0; + nfds++; + } + + fds_idx++; + } + + return nfds; +} + +VkResult +tu_WaitForFences(VkDevice _device, + uint32_t fenceCount, + const VkFence *pFences, + VkBool32 waitAll, + uint64_t timeout) +{ + TU_FROM_HANDLE(tu_device, device, _device); + + /* add a simpler path for when fenceCount == 1? */ + + struct pollfd stack_fds[8]; + struct pollfd *fds = stack_fds; + if (fenceCount > ARRAY_SIZE(stack_fds)) { + fds = vk_alloc(&device->alloc, sizeof(*fds) * fenceCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!fds) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + /* set up pollfd array and start polling */ + nfds_t nfds = tu_fence_init_poll_fds(fenceCount, pFences, waitAll, fds); + VkResult result = VK_SUCCESS; + while (nfds) { + result = tu_fence_poll_fds(fds, nfds, &timeout); + if (result != VK_SUCCESS) + break; + nfds = tu_fence_update_fences_and_poll_fds(fenceCount, pFences, waitAll, + fds); + } + + if (fds != stack_fds) + vk_free(&device->alloc, fds); + + return result; +} + +VkResult +tu_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences) +{ + for (unsigned i = 0; i < fenceCount; ++i) { + TU_FROM_HANDLE(tu_fence, fence, pFences[i]); + assert(tu_fence_get_state(fence) != TU_FENCE_STATE_PENDING); + tu_fence_set_state(fence, TU_FENCE_STATE_RESET, -1); + } + + return VK_SUCCESS; +} + +VkResult +tu_GetFenceStatus(VkDevice _device, VkFence _fence) +{ + TU_FROM_HANDLE(tu_fence, fence, _fence); + + if (fence->fd >= 0) { + int err = sync_wait(fence->fd, 0); + if (!err) + tu_fence_set_state(fence, TU_FENCE_STATE_SIGNALED, -1); + else if (err && errno != ETIME) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + return fence->signaled ? VK_SUCCESS : VK_NOT_READY; +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_formats.c b/lib/mesa/src/freedreno/vulkan/tu_formats.c new file mode 100644 index 000000000..537b59d25 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_formats.c @@ -0,0 +1,998 @@ + +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include "registers/adreno_common.xml.h" +#include "registers/a6xx.xml.h" + +#include "util/format_r11g11b10f.h" +#include "util/format_srgb.h" +#include "util/u_half.h" +#include "vk_format.h" +#include "vk_util.h" + +/** + * Declare a format table. A format table is an array of tu_native_format. + * It can map a consecutive range of VkFormat to the corresponding + * tu_native_format. + * + * TU_FORMAT_TABLE_FIRST and TU_FORMAT_TABLE_LAST must already be defined and + * have the values of the first and last VkFormat of the array respectively. + */ +#define TU_FORMAT_TABLE(var) \ + static const VkFormat var##_first = TU_FORMAT_TABLE_FIRST; \ + static const VkFormat var##_last = TU_FORMAT_TABLE_LAST; \ + static const struct tu_native_format var[TU_FORMAT_TABLE_LAST - TU_FORMAT_TABLE_FIRST + 1] +#undef TU_FORMAT_TABLE_FIRST +#undef TU_FORMAT_TABLE_LAST + +#define VFMT6_x -1 +#define TFMT6_x -1 +#define RB6_x -1 + +#define TU6_FMT(vkfmt, vtxfmt, texfmt, rbfmt, swapfmt, valid) \ + [VK_FORMAT_##vkfmt - TU_FORMAT_TABLE_FIRST] = { \ + .vtx = VFMT6_##vtxfmt, \ + .tex = TFMT6_##texfmt, \ + .rb = RB6_##rbfmt, \ + .swap = swapfmt, \ + .present = valid, \ + } + +/** + * fmt/alias/swap are derived from VkFormat mechanically (and might not even + * exist). It is the macro of choice that decides whether a VkFormat is + * supported and how. + */ +#define TU6_VTC(vk, fmt, alias, swap) TU6_FMT(vk, fmt, fmt, alias, swap, true) +#define TU6_xTC(vk, fmt, alias, swap) TU6_FMT(vk, x, fmt, alias, swap, true) +#define TU6_VTx(vk, fmt, alias, swap) TU6_FMT(vk, fmt, fmt, x, swap, true) +#define TU6_Vxx(vk, fmt, alias, swap) TU6_FMT(vk, fmt, x, x, swap, true) +#define TU6_xTx(vk, fmt, alias, swap) TU6_FMT(vk, x, fmt, x, swap, true) +#define TU6_xxx(vk, fmt, alias, swap) TU6_FMT(vk, x, x, x, WZYX, false) + +#define TU_FORMAT_TABLE_FIRST VK_FORMAT_UNDEFINED +#define TU_FORMAT_TABLE_LAST VK_FORMAT_ASTC_12x12_SRGB_BLOCK +TU_FORMAT_TABLE(tu6_format_table0) = { + TU6_xxx(UNDEFINED, x, x, x), /* 0 */ + + /* 8-bit packed */ + TU6_xxx(R4G4_UNORM_PACK8, 4_4_UNORM, R4G4_UNORM, WZXY), /* 1 */ + + /* 16-bit packed */ + TU6_xTC(R4G4B4A4_UNORM_PACK16, 4_4_4_4_UNORM, R4G4B4A4_UNORM, XYZW), /* 2 */ + TU6_xTC(B4G4R4A4_UNORM_PACK16, 4_4_4_4_UNORM, R4G4B4A4_UNORM, ZYXW), /* 3 */ + TU6_xTC(R5G6B5_UNORM_PACK16, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ), /* 4 */ + TU6_xTC(B5G6R5_UNORM_PACK16, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ), /* 5 */ + TU6_xxx(R5G5B5A1_UNORM_PACK16, 1_5_5_5_UNORM, A1R5G5B5_UNORM, XYZW), /* 6 */ + TU6_xxx(B5G5R5A1_UNORM_PACK16, 1_5_5_5_UNORM, A1R5G5B5_UNORM, XYZW), /* 7 */ + TU6_xTC(A1R5G5B5_UNORM_PACK16, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), /* 8 */ + + /* 8-bit R */ + TU6_VTC(R8_UNORM, 8_UNORM, R8_UNORM, WZYX), /* 9 */ + TU6_VTC(R8_SNORM, 8_SNORM, R8_SNORM, WZYX), /* 10 */ + TU6_Vxx(R8_USCALED, 8_UINT, R8_UINT, WZYX), /* 11 */ + TU6_Vxx(R8_SSCALED, 8_SINT, R8_SINT, WZYX), /* 12 */ + TU6_VTC(R8_UINT, 8_UINT, R8_UINT, WZYX), /* 13 */ + TU6_VTC(R8_SINT, 8_SINT, R8_SINT, WZYX), /* 14 */ + TU6_xTC(R8_SRGB, 8_UNORM, R8_UNORM, WZYX), /* 15 */ + + /* 16-bit RG */ + TU6_VTC(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX), /* 16 */ + TU6_VTC(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX), /* 17 */ + TU6_Vxx(R8G8_USCALED, 8_8_UINT, R8G8_UINT, WZYX), /* 18 */ + TU6_Vxx(R8G8_SSCALED, 8_8_SINT, R8G8_SINT, WZYX), /* 19 */ + TU6_VTC(R8G8_UINT, 8_8_UINT, R8G8_UINT, WZYX), /* 20 */ + TU6_VTC(R8G8_SINT, 8_8_SINT, R8G8_SINT, WZYX), /* 21 */ + TU6_xTC(R8G8_SRGB, 8_8_UNORM, R8G8_UNORM, WZYX), /* 22 */ + + /* 24-bit RGB */ + TU6_Vxx(R8G8B8_UNORM, 8_8_8_UNORM, R8G8B8_UNORM, WZYX), /* 23 */ + TU6_Vxx(R8G8B8_SNORM, 8_8_8_SNORM, R8G8B8_SNORM, WZYX), /* 24 */ + TU6_Vxx(R8G8B8_USCALED, 8_8_8_UINT, R8G8B8_UINT, WZYX), /* 25 */ + TU6_Vxx(R8G8B8_SSCALED, 8_8_8_SINT, R8G8B8_SINT, WZYX), /* 26 */ + TU6_Vxx(R8G8B8_UINT, 8_8_8_UINT, R8G8B8_UINT, WZYX), /* 27 */ + TU6_Vxx(R8G8B8_SINT, 8_8_8_SINT, R8G8B8_SINT, WZYX), /* 28 */ + TU6_xxx(R8G8B8_SRGB, 8_8_8_UNORM, R8G8B8_UNORM, WZYX), /* 29 */ + + /* 24-bit BGR */ + TU6_Vxx(B8G8R8_UNORM, 8_8_8_UNORM, R8G8B8_UNORM, WXYZ), /* 30 */ + TU6_Vxx(B8G8R8_SNORM, 8_8_8_SNORM, R8G8B8_SNORM, WXYZ), /* 31 */ + TU6_Vxx(B8G8R8_USCALED, 8_8_8_UINT, R8G8B8_UINT, WXYZ), /* 32 */ + TU6_Vxx(B8G8R8_SSCALED, 8_8_8_SINT, R8G8B8_SINT, WXYZ), /* 33 */ + TU6_Vxx(B8G8R8_UINT, 8_8_8_UINT, R8G8B8_UINT, WXYZ), /* 34 */ + TU6_Vxx(B8G8R8_SINT, 8_8_8_SINT, R8G8B8_SINT, WXYZ), /* 35 */ + TU6_xxx(B8G8R8_SRGB, 8_8_8_UNORM, R8G8B8_UNORM, WXYZ), /* 36 */ + + /* 32-bit RGBA */ + TU6_VTC(R8G8B8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), /* 37 */ + TU6_VTC(R8G8B8A8_SNORM, 8_8_8_8_SNORM, R8G8B8A8_SNORM, WZYX), /* 38 */ + TU6_Vxx(R8G8B8A8_USCALED, 8_8_8_8_UINT, R8G8B8A8_UINT, WZYX), /* 39 */ + TU6_Vxx(R8G8B8A8_SSCALED, 8_8_8_8_SINT, R8G8B8A8_SINT, WZYX), /* 40 */ + TU6_VTC(R8G8B8A8_UINT, 8_8_8_8_UINT, R8G8B8A8_UINT, WZYX), /* 41 */ + TU6_VTC(R8G8B8A8_SINT, 8_8_8_8_SINT, R8G8B8A8_SINT, WZYX), /* 42 */ + TU6_xTC(R8G8B8A8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), /* 43 */ + + /* 32-bit BGRA */ + TU6_VTC(B8G8R8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), /* 44 */ + TU6_VTC(B8G8R8A8_SNORM, 8_8_8_8_SNORM, R8G8B8A8_SNORM, WXYZ), /* 45 */ + TU6_Vxx(B8G8R8A8_USCALED, 8_8_8_8_UINT, R8G8B8A8_UINT, WXYZ), /* 46 */ + TU6_Vxx(B8G8R8A8_SSCALED, 8_8_8_8_SINT, R8G8B8A8_SINT, WXYZ), /* 47 */ + TU6_VTC(B8G8R8A8_UINT, 8_8_8_8_UINT, R8G8B8A8_UINT, WXYZ), /* 48 */ + TU6_VTC(B8G8R8A8_SINT, 8_8_8_8_SINT, R8G8B8A8_SINT, WXYZ), /* 49 */ + TU6_xTC(B8G8R8A8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), /* 50 */ + + /* 32-bit packed */ + TU6_VTC(A8B8G8R8_UNORM_PACK32, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), /* 51 */ + TU6_VTC(A8B8G8R8_SNORM_PACK32, 8_8_8_8_SNORM, R8G8B8A8_SNORM, WZYX), /* 52 */ + TU6_Vxx(A8B8G8R8_USCALED_PACK32, 8_8_8_8_UINT, R8G8B8A8_UINT, WZYX), /* 53 */ + TU6_Vxx(A8B8G8R8_SSCALED_PACK32, 8_8_8_8_SINT, R8G8B8A8_SINT, WZYX), /* 54 */ + TU6_VTC(A8B8G8R8_UINT_PACK32, 8_8_8_8_UINT, R8G8B8A8_UINT, WZYX), /* 55 */ + TU6_VTC(A8B8G8R8_SINT_PACK32, 8_8_8_8_SINT, R8G8B8A8_SINT, WZYX), /* 56 */ + TU6_xTC(A8B8G8R8_SRGB_PACK32, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), /* 57 */ + TU6_VTC(A2R10G10B10_UNORM_PACK32, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), /* 58 */ + TU6_Vxx(A2R10G10B10_SNORM_PACK32, 10_10_10_2_SNORM, R10G10B10A2_SNORM, WXYZ), /* 59 */ + TU6_Vxx(A2R10G10B10_USCALED_PACK32, 10_10_10_2_UINT, R10G10B10A2_UINT, WXYZ), /* 60 */ + TU6_Vxx(A2R10G10B10_SSCALED_PACK32, 10_10_10_2_SINT, R10G10B10A2_SINT, WXYZ), /* 61 */ + TU6_VTC(A2R10G10B10_UINT_PACK32, 10_10_10_2_UINT, R10G10B10A2_UINT, WXYZ), /* 62 */ + TU6_Vxx(A2R10G10B10_SINT_PACK32, 10_10_10_2_SINT, R10G10B10A2_SINT, WXYZ), /* 63 */ + TU6_VTC(A2B10G10R10_UNORM_PACK32, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WZYX), /* 64 */ + TU6_Vxx(A2B10G10R10_SNORM_PACK32, 10_10_10_2_SNORM, R10G10B10A2_SNORM, WZYX), /* 65 */ + TU6_Vxx(A2B10G10R10_USCALED_PACK32, 10_10_10_2_UINT, R10G10B10A2_UINT, WZYX), /* 66 */ + TU6_Vxx(A2B10G10R10_SSCALED_PACK32, 10_10_10_2_SINT, R10G10B10A2_SINT, WZYX), /* 67 */ + TU6_VTC(A2B10G10R10_UINT_PACK32, 10_10_10_2_UINT, R10G10B10A2_UINT, WZYX), /* 68 */ + TU6_Vxx(A2B10G10R10_SINT_PACK32, 10_10_10_2_SINT, R10G10B10A2_SINT, WZYX), /* 69 */ + + /* 16-bit R */ + TU6_VTC(R16_UNORM, 16_UNORM, R16_UNORM, WZYX), /* 70 */ + TU6_VTC(R16_SNORM, 16_SNORM, R16_SNORM, WZYX), /* 71 */ + TU6_Vxx(R16_USCALED, 16_UINT, R16_UINT, WZYX), /* 72 */ + TU6_Vxx(R16_SSCALED, 16_SINT, R16_SINT, WZYX), /* 73 */ + TU6_VTC(R16_UINT, 16_UINT, R16_UINT, WZYX), /* 74 */ + TU6_VTC(R16_SINT, 16_SINT, R16_SINT, WZYX), /* 75 */ + TU6_VTC(R16_SFLOAT, 16_FLOAT, R16_FLOAT, WZYX), /* 76 */ + + /* 32-bit RG */ + TU6_VTC(R16G16_UNORM, 16_16_UNORM, R16G16_UNORM, WZYX), /* 77 */ + TU6_VTC(R16G16_SNORM, 16_16_SNORM, R16G16_SNORM, WZYX), /* 78 */ + TU6_VTx(R16G16_USCALED, 16_16_UINT, R16G16_UINT, WZYX), /* 79 */ + TU6_VTx(R16G16_SSCALED, 16_16_SINT, R16G16_SINT, WZYX), /* 80 */ + TU6_VTC(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX), /* 81 */ + TU6_VTC(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX), /* 82 */ + TU6_VTC(R16G16_SFLOAT, 16_16_FLOAT, R16G16_FLOAT, WZYX), /* 83 */ + + /* 48-bit RGB */ + TU6_Vxx(R16G16B16_UNORM, 16_16_16_UNORM, R16G16B16_UNORM, WZYX), /* 84 */ + TU6_Vxx(R16G16B16_SNORM, 16_16_16_SNORM, R16G16B16_SNORM, WZYX), /* 85 */ + TU6_Vxx(R16G16B16_USCALED, 16_16_16_UINT, R16G16B16_UINT, WZYX), /* 86 */ + TU6_Vxx(R16G16B16_SSCALED, 16_16_16_SINT, R16G16B16_SINT, WZYX), /* 87 */ + TU6_Vxx(R16G16B16_UINT, 16_16_16_UINT, R16G16B16_UINT, WZYX), /* 88 */ + TU6_Vxx(R16G16B16_SINT, 16_16_16_SINT, R16G16B16_SINT, WZYX), /* 89 */ + TU6_Vxx(R16G16B16_SFLOAT, 16_16_16_FLOAT, R16G16B16_FLOAT, WZYX), /* 90 */ + + /* 64-bit RGBA */ + TU6_VTC(R16G16B16A16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), /* 91 */ + TU6_VTC(R16G16B16A16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), /* 92 */ + TU6_VTx(R16G16B16A16_USCALED, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), /* 93 */ + TU6_VTx(R16G16B16A16_SSCALED, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), /* 94 */ + TU6_VTC(R16G16B16A16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), /* 95 */ + TU6_VTC(R16G16B16A16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), /* 96 */ + TU6_VTC(R16G16B16A16_SFLOAT, 16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX), /* 97 */ + + /* 32-bit R */ + TU6_VTC(R32_UINT, 32_UINT, R32_UINT, WZYX), /* 98 */ + TU6_VTC(R32_SINT, 32_SINT, R32_SINT, WZYX), /* 99 */ + TU6_VTC(R32_SFLOAT, 32_FLOAT, R32_FLOAT, WZYX), /* 100 */ + + /* 64-bit RG */ + TU6_VTC(R32G32_UINT, 32_32_UINT, R32G32_UINT, WZYX), /* 101 */ + TU6_VTC(R32G32_SINT, 32_32_SINT, R32G32_SINT, WZYX), /* 102 */ + TU6_VTC(R32G32_SFLOAT, 32_32_FLOAT, R32G32_FLOAT, WZYX), /* 103 */ + + /* 96-bit RGB */ + TU6_VTx(R32G32B32_UINT, 32_32_32_UINT, R32G32B32_UINT, WZYX), /* 104 */ + TU6_VTx(R32G32B32_SINT, 32_32_32_SINT, R32G32B32_SINT, WZYX), /* 105 */ + TU6_VTx(R32G32B32_SFLOAT, 32_32_32_FLOAT, R32G32B32_FLOAT, WZYX), /* 106 */ + + /* 128-bit RGBA */ + TU6_VTC(R32G32B32A32_UINT, 32_32_32_32_UINT, R32G32B32A32_UINT, WZYX), /* 107 */ + TU6_VTC(R32G32B32A32_SINT, 32_32_32_32_SINT, R32G32B32A32_SINT, WZYX), /* 108 */ + TU6_VTC(R32G32B32A32_SFLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), /* 109 */ + + /* 64-bit R */ + TU6_xxx(R64_UINT, 64_UINT, R64_UINT, WZYX), /* 110 */ + TU6_xxx(R64_SINT, 64_SINT, R64_SINT, WZYX), /* 111 */ + TU6_xxx(R64_SFLOAT, 64_FLOAT, R64_FLOAT, WZYX), /* 112 */ + + /* 128-bit RG */ + TU6_xxx(R64G64_UINT, 64_64_UINT, R64G64_UINT, WZYX), /* 113 */ + TU6_xxx(R64G64_SINT, 64_64_SINT, R64G64_SINT, WZYX), /* 114 */ + TU6_xxx(R64G64_SFLOAT, 64_64_FLOAT, R64G64_FLOAT, WZYX), /* 115 */ + + /* 192-bit RGB */ + TU6_xxx(R64G64B64_UINT, 64_64_64_UINT, R64G64B64_UINT, WZYX), /* 116 */ + TU6_xxx(R64G64B64_SINT, 64_64_64_SINT, R64G64B64_SINT, WZYX), /* 117 */ + TU6_xxx(R64G64B64_SFLOAT, 64_64_64_FLOAT, R64G64B64_FLOAT, WZYX), /* 118 */ + + /* 256-bit RGBA */ + TU6_xxx(R64G64B64A64_UINT, 64_64_64_64_UINT, R64G64B64A64_UINT, WZYX), /* 119 */ + TU6_xxx(R64G64B64A64_SINT, 64_64_64_64_SINT, R64G64B64A64_SINT, WZYX), /* 120 */ + TU6_xxx(R64G64B64A64_SFLOAT, 64_64_64_64_FLOAT, R64G64B64A64_FLOAT, WZYX), /* 121 */ + + /* 32-bit packed float */ + TU6_VTC(B10G11R11_UFLOAT_PACK32, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), /* 122 */ + TU6_xTx(E5B9G9R9_UFLOAT_PACK32, 9_9_9_E5_FLOAT, R9G9B9E5_FLOAT, WZYX), /* 123 */ + + /* depth/stencil */ + TU6_xTC(D16_UNORM, 16_UNORM, R16_UNORM, WZYX), /* 124 */ + TU6_xTC(X8_D24_UNORM_PACK32, X8Z24_UNORM, X8Z24_UNORM, WZYX), /* 125 */ + TU6_xTC(D32_SFLOAT, 32_FLOAT, R32_FLOAT, WZYX), /* 126 */ + TU6_xTC(S8_UINT, 8_UINT, R8_UNORM, WZYX), /* 127 */ + TU6_xxx(D16_UNORM_S8_UINT, X8Z16_UNORM, X8Z16_UNORM, WZYX), /* 128 */ + TU6_xTC(D24_UNORM_S8_UINT, X8Z24_UNORM, X8Z24_UNORM, WZYX), /* 129 */ + TU6_xTC(D32_SFLOAT_S8_UINT, 32_FLOAT, R32_FLOAT, WZYX), /* 130 */ + + /* compressed */ + TU6_xTx(BC1_RGB_UNORM_BLOCK, DXT1, DXT1, WZYX), /* 131 */ + TU6_xTx(BC1_RGB_SRGB_BLOCK, DXT1, DXT1, WZYX), /* 132 */ + TU6_xTx(BC1_RGBA_UNORM_BLOCK, DXT1, DXT1, WZYX), /* 133 */ + TU6_xTx(BC1_RGBA_SRGB_BLOCK, DXT1, DXT1, WZYX), /* 134 */ + TU6_xTx(BC2_UNORM_BLOCK, DXT3, DXT3, WZYX), /* 135 */ + TU6_xTx(BC2_SRGB_BLOCK, DXT3, DXT3, WZYX), /* 136 */ + TU6_xTx(BC3_UNORM_BLOCK, DXT5, DXT5, WZYX), /* 137 */ + TU6_xTx(BC3_SRGB_BLOCK, DXT5, DXT5, WZYX), /* 138 */ + TU6_xTx(BC4_UNORM_BLOCK, RGTC1_UNORM, RGTC1_UNORM, WZYX), /* 139 */ + TU6_xTx(BC4_SNORM_BLOCK, RGTC1_SNORM, RGTC1_SNORM, WZYX), /* 140 */ + TU6_xTx(BC5_UNORM_BLOCK, RGTC2_UNORM, RGTC2_UNORM, WZYX), /* 141 */ + TU6_xTx(BC5_SNORM_BLOCK, RGTC2_SNORM, RGTC2_SNORM, WZYX), /* 142 */ + TU6_xTx(BC6H_UFLOAT_BLOCK, BPTC_UFLOAT, BPTC_UFLOAT, WZYX), /* 143 */ + TU6_xTx(BC6H_SFLOAT_BLOCK, BPTC_FLOAT, BPTC_FLOAT, WZYX), /* 144 */ + TU6_xTx(BC7_UNORM_BLOCK, BPTC, BPTC, WZYX), /* 145 */ + TU6_xTx(BC7_SRGB_BLOCK, BPTC, BPTC, WZYX), /* 146 */ + TU6_xTx(ETC2_R8G8B8_UNORM_BLOCK, ETC2_RGB8, ETC2_RGB8, WZYX), /* 147 */ + TU6_xTx(ETC2_R8G8B8_SRGB_BLOCK, ETC2_RGB8, ETC2_RGB8, WZYX), /* 148 */ + TU6_xTx(ETC2_R8G8B8A1_UNORM_BLOCK, ETC2_RGB8A1, ETC2_RGB8A1, WZYX), /* 149 */ + TU6_xTx(ETC2_R8G8B8A1_SRGB_BLOCK, ETC2_RGB8A1, ETC2_RGB8A1, WZYX), /* 150 */ + TU6_xTx(ETC2_R8G8B8A8_UNORM_BLOCK, ETC2_RGBA8, ETC2_RGBA8, WZYX), /* 151 */ + TU6_xTx(ETC2_R8G8B8A8_SRGB_BLOCK, ETC2_RGBA8, ETC2_RGBA8, WZYX), /* 152 */ + TU6_xTx(EAC_R11_UNORM_BLOCK, ETC2_R11_UNORM, ETC2_R11_UNORM, WZYX), /* 153 */ + TU6_xTx(EAC_R11_SNORM_BLOCK, ETC2_R11_SNORM, ETC2_R11_SNORM, WZYX), /* 154 */ + TU6_xTx(EAC_R11G11_UNORM_BLOCK, ETC2_RG11_UNORM, ETC2_RG11_UNORM, WZYX), /* 155 */ + TU6_xTx(EAC_R11G11_SNORM_BLOCK, ETC2_RG11_SNORM, ETC2_RG11_SNORM, WZYX), /* 156 */ + TU6_xTx(ASTC_4x4_UNORM_BLOCK, ASTC_4x4, ASTC_4x4, WZYX), /* 157 */ + TU6_xTx(ASTC_4x4_SRGB_BLOCK, ASTC_4x4, ASTC_4x4, WZYX), /* 158 */ + TU6_xTx(ASTC_5x4_UNORM_BLOCK, ASTC_5x4, ASTC_5x4, WZYX), /* 159 */ + TU6_xTx(ASTC_5x4_SRGB_BLOCK, ASTC_5x4, ASTC_5x4, WZYX), /* 160 */ + TU6_xTx(ASTC_5x5_UNORM_BLOCK, ASTC_5x5, ASTC_5x5, WZYX), /* 161 */ + TU6_xTx(ASTC_5x5_SRGB_BLOCK, ASTC_5x5, ASTC_5x5, WZYX), /* 162 */ + TU6_xTx(ASTC_6x5_UNORM_BLOCK, ASTC_6x5, ASTC_6x5, WZYX), /* 163 */ + TU6_xTx(ASTC_6x5_SRGB_BLOCK, ASTC_6x5, ASTC_6x5, WZYX), /* 164 */ + TU6_xTx(ASTC_6x6_UNORM_BLOCK, ASTC_6x6, ASTC_6x6, WZYX), /* 165 */ + TU6_xTx(ASTC_6x6_SRGB_BLOCK, ASTC_6x6, ASTC_6x6, WZYX), /* 166 */ + TU6_xTx(ASTC_8x5_UNORM_BLOCK, ASTC_8x5, ASTC_8x5, WZYX), /* 167 */ + TU6_xTx(ASTC_8x5_SRGB_BLOCK, ASTC_8x5, ASTC_8x5, WZYX), /* 168 */ + TU6_xTx(ASTC_8x6_UNORM_BLOCK, ASTC_8x6, ASTC_8x6, WZYX), /* 169 */ + TU6_xTx(ASTC_8x6_SRGB_BLOCK, ASTC_8x6, ASTC_8x6, WZYX), /* 170 */ + TU6_xTx(ASTC_8x8_UNORM_BLOCK, ASTC_8x8, ASTC_8x8, WZYX), /* 171 */ + TU6_xTx(ASTC_8x8_SRGB_BLOCK, ASTC_8x8, ASTC_8x8, WZYX), /* 172 */ + TU6_xTx(ASTC_10x5_UNORM_BLOCK, ASTC_10x5, ASTC_10x5, WZYX), /* 173 */ + TU6_xTx(ASTC_10x5_SRGB_BLOCK, ASTC_10x5, ASTC_10x5, WZYX), /* 174 */ + TU6_xTx(ASTC_10x6_UNORM_BLOCK, ASTC_10x6, ASTC_10x6, WZYX), /* 175 */ + TU6_xTx(ASTC_10x6_SRGB_BLOCK, ASTC_10x6, ASTC_10x6, WZYX), /* 176 */ + TU6_xTx(ASTC_10x8_UNORM_BLOCK, ASTC_10x8, ASTC_10x8, WZYX), /* 177 */ + TU6_xTx(ASTC_10x8_SRGB_BLOCK, ASTC_10x8, ASTC_10x8, WZYX), /* 178 */ + TU6_xTx(ASTC_10x10_UNORM_BLOCK, ASTC_10x10, ASTC_10x10, WZYX), /* 179 */ + TU6_xTx(ASTC_10x10_SRGB_BLOCK, ASTC_10x10, ASTC_10x10, WZYX), /* 180 */ + TU6_xTx(ASTC_12x10_UNORM_BLOCK, ASTC_12x10, ASTC_12x10, WZYX), /* 181 */ + TU6_xTx(ASTC_12x10_SRGB_BLOCK, ASTC_12x10, ASTC_12x10, WZYX), /* 182 */ + TU6_xTx(ASTC_12x12_UNORM_BLOCK, ASTC_12x12, ASTC_12x12, WZYX), /* 183 */ + TU6_xTx(ASTC_12x12_SRGB_BLOCK, ASTC_12x12, ASTC_12x12, WZYX), /* 184 */ +}; +#undef TU_FORMAT_TABLE_FIRST +#undef TU_FORMAT_TABLE_LAST + +const struct tu_native_format * +tu6_get_native_format(VkFormat format) +{ + const struct tu_native_format *fmt = NULL; + + if (format >= tu6_format_table0_first && format <= tu6_format_table0_last) + fmt = &tu6_format_table0[format - tu6_format_table0_first]; + + return (fmt && fmt->present) ? fmt : NULL; +} + +enum a6xx_2d_ifmt +tu6_rb_fmt_to_ifmt(enum a6xx_color_fmt fmt) +{ + switch (fmt) { + case RB6_A8_UNORM: + case RB6_R8_UNORM: + case RB6_R8_SNORM: + case RB6_R8G8_UNORM: + case RB6_R8G8_SNORM: + case RB6_R8G8B8A8_UNORM: + case RB6_R8G8B8_UNORM: + case RB6_R8G8B8A8_SNORM: + return R2D_UNORM8; + + case RB6_R32_UINT: + case RB6_R32_SINT: + case RB6_R32G32_UINT: + case RB6_R32G32_SINT: + case RB6_R32G32B32A32_UINT: + case RB6_R32G32B32A32_SINT: + return R2D_INT32; + + case RB6_R16_UINT: + case RB6_R16_SINT: + case RB6_R16G16_UINT: + case RB6_R16G16_SINT: + case RB6_R16G16B16A16_UINT: + case RB6_R16G16B16A16_SINT: + return R2D_INT16; + + case RB6_R8_UINT: + case RB6_R8_SINT: + case RB6_R8G8_UINT: + case RB6_R8G8_SINT: + case RB6_R8G8B8A8_UINT: + case RB6_R8G8B8A8_SINT: + return R2D_INT8; + + case RB6_R16_UNORM: + case RB6_R16_SNORM: + case RB6_R16G16_UNORM: + case RB6_R16G16_SNORM: + case RB6_R16G16B16A16_UNORM: + case RB6_R16G16B16A16_SNORM: + case RB6_R32_FLOAT: + case RB6_R32G32_FLOAT: + case RB6_R32G32B32A32_FLOAT: + return R2D_FLOAT32; + + case RB6_R16_FLOAT: + case RB6_R16G16_FLOAT: + case RB6_R16G16B16A16_FLOAT: + return R2D_FLOAT16; + + case RB6_R4G4B4A4_UNORM: + case RB6_R5G5B5A1_UNORM: + case RB6_R5G6B5_UNORM: + case RB6_R10G10B10A2_UNORM: + case RB6_R10G10B10A2_UINT: + case RB6_R11G11B10_FLOAT: + case RB6_X8Z24_UNORM: + // ??? + return 0; + default: + unreachable("bad format"); + return 0; + } +} + +static uint32_t +tu_pack_mask(int bits) +{ + assert(bits <= 32); + return (1ull << bits) - 1; +} + +static uint32_t +tu_pack_float32_for_unorm(float val, int bits) +{ + const uint32_t max = tu_pack_mask(bits); + if (val < 0.0f) + return 0; + else if (val > 1.0f) + return max; + else + return _mesa_lroundevenf(val * (float) max); +} + +static uint32_t +tu_pack_float32_for_snorm(float val, int bits) +{ + const int32_t max = tu_pack_mask(bits - 1); + int32_t tmp; + if (val < -1.0f) + tmp = -max; + else if (val > 1.0f) + tmp = max; + else + tmp = _mesa_lroundevenf(val * (float) max); + + return tmp & tu_pack_mask(bits); +} + +static uint32_t +tu_pack_float32_for_uscaled(float val, int bits) +{ + const uint32_t max = tu_pack_mask(bits); + if (val < 0.0f) + return 0; + else if (val > (float) max) + return max; + else + return (uint32_t) val; +} + +static uint32_t +tu_pack_float32_for_sscaled(float val, int bits) +{ + const int32_t max = tu_pack_mask(bits - 1); + const int32_t min = -max - 1; + int32_t tmp; + if (val < (float) min) + tmp = min; + else if (val > (float) max) + tmp = max; + else + tmp = (int32_t) val; + + return tmp & tu_pack_mask(bits); +} + +static uint32_t +tu_pack_uint32_for_uint(uint32_t val, int bits) +{ + return val & tu_pack_mask(bits); +} + +static uint32_t +tu_pack_int32_for_sint(int32_t val, int bits) +{ + return val & tu_pack_mask(bits); +} + +static uint32_t +tu_pack_float32_for_sfloat(float val, int bits) +{ + assert(bits == 16 || bits == 32); + return bits == 16 ? util_float_to_half(val) : fui(val); +} + +union tu_clear_component_value { + float float32; + int32_t int32; + uint32_t uint32; +}; + +static uint32_t +tu_pack_clear_component_value(union tu_clear_component_value val, + const struct vk_format_channel_description *ch) +{ + uint32_t packed; + + switch (ch->type) { + case VK_FORMAT_TYPE_UNSIGNED: + /* normalized, scaled, or pure integer */ + assert(ch->normalized + ch->scaled + ch->pure_integer == 1); + if (ch->normalized) + packed = tu_pack_float32_for_unorm(val.float32, ch->size); + else if (ch->scaled) + packed = tu_pack_float32_for_uscaled(val.float32, ch->size); + else + packed = tu_pack_uint32_for_uint(val.uint32, ch->size); + break; + case VK_FORMAT_TYPE_SIGNED: + /* normalized, scaled, or pure integer */ + assert(ch->normalized + ch->scaled + ch->pure_integer == 1); + if (ch->normalized) + packed = tu_pack_float32_for_snorm(val.float32, ch->size); + else if (ch->scaled) + packed = tu_pack_float32_for_sscaled(val.float32, ch->size); + else + packed = tu_pack_int32_for_sint(val.int32, ch->size); + break; + case VK_FORMAT_TYPE_FLOAT: + packed = tu_pack_float32_for_sfloat(val.float32, ch->size); + break; + default: + unreachable("unexpected channel type"); + packed = 0; + break; + } + + assert((packed & tu_pack_mask(ch->size)) == packed); + return packed; +} + +static const struct vk_format_channel_description * +tu_get_format_channel_description(const struct vk_format_description *desc, + int comp) +{ + switch (desc->swizzle[comp]) { + case VK_SWIZZLE_X: + return &desc->channel[0]; + case VK_SWIZZLE_Y: + return &desc->channel[1]; + case VK_SWIZZLE_Z: + return &desc->channel[2]; + case VK_SWIZZLE_W: + return &desc->channel[3]; + default: + return NULL; + } +} + +static union tu_clear_component_value +tu_get_clear_component_value(const VkClearValue *val, int comp, bool color) +{ + union tu_clear_component_value tmp; + if (color) { + assert(comp < 4); + tmp.uint32 = val->color.uint32[comp]; + } else { + assert(comp < 2); + if (comp == 0) + tmp.float32 = val->depthStencil.depth; + else + tmp.uint32 = val->depthStencil.stencil; + } + + return tmp; +} + +/** + * Pack a VkClearValue into a 128-bit buffer. \a format is respected except + * for the component order. The components are always packed in WZYX order + * (i.e., msb is white and lsb is red). + * + * Return the number of uint32_t's used. + */ +int +tu_pack_clear_value(const VkClearValue *val, VkFormat format, uint32_t buf[4]) +{ + const struct vk_format_description *desc = vk_format_description(format); + assert(desc && desc->layout == VK_FORMAT_LAYOUT_PLAIN); + + /* S8_UINT is special and has no depth */ + const int max_components = + format == VK_FORMAT_S8_UINT ? 2 : desc->nr_channels; + + int buf_offset = 0; + int bit_shift = 0; + for (int comp = 0; comp < max_components; comp++) { + const struct vk_format_channel_description *ch = + tu_get_format_channel_description(desc, comp); + if (!ch) { + assert(format == VK_FORMAT_S8_UINT && comp == 0); + continue; + } + + union tu_clear_component_value v = tu_get_clear_component_value( + val, comp, desc->colorspace != VK_FORMAT_COLORSPACE_ZS); + + /* move to the next uint32_t when there is not enough space */ + assert(ch->size <= 32); + if (bit_shift + ch->size > 32) { + buf_offset++; + bit_shift = 0; + } + + if (bit_shift == 0) + buf[buf_offset] = 0; + + buf[buf_offset] |= tu_pack_clear_component_value(v, ch) << bit_shift; + bit_shift += ch->size; + } + + return buf_offset + 1; +} + +static void +tu_physical_device_get_format_properties( + struct tu_physical_device *physical_device, + VkFormat format, + VkFormatProperties *out_properties) +{ + VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0; + const struct vk_format_description *desc = vk_format_description(format); + const struct tu_native_format *native_fmt = tu6_get_native_format(format); + if (!desc || !native_fmt) { + out_properties->linearTilingFeatures = linear; + out_properties->optimalTilingFeatures = tiled; + out_properties->bufferFeatures = buffer; + return; + } + + linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; + tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; + buffer |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; + + if (native_fmt->tex >= 0) { + linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; + } + + if (native_fmt->rb >= 0) { + linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + } + + if (native_fmt->vtx >= 0) { + buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + } + + out_properties->linearTilingFeatures = linear; + out_properties->optimalTilingFeatures = tiled; + out_properties->bufferFeatures = buffer; +} + +void +tu_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties *pFormatProperties) +{ + TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice); + + tu_physical_device_get_format_properties(physical_device, format, + pFormatProperties); +} + +void +tu_GetPhysicalDeviceFormatProperties2( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties2 *pFormatProperties) +{ + TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice); + + tu_physical_device_get_format_properties( + physical_device, format, &pFormatProperties->formatProperties); +} + +static VkResult +tu_get_image_format_properties( + struct tu_physical_device *physical_device, + const VkPhysicalDeviceImageFormatInfo2 *info, + VkImageFormatProperties *pImageFormatProperties) + +{ + VkFormatProperties format_props; + VkFormatFeatureFlags format_feature_flags; + VkExtent3D maxExtent; + uint32_t maxMipLevels; + uint32_t maxArraySize; + VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; + + tu_physical_device_get_format_properties(physical_device, info->format, + &format_props); + if (info->tiling == VK_IMAGE_TILING_LINEAR) { + format_feature_flags = format_props.linearTilingFeatures; + } else if (info->tiling == VK_IMAGE_TILING_OPTIMAL) { + format_feature_flags = format_props.optimalTilingFeatures; + } else { + unreachable("bad VkImageTiling"); + } + + if (format_feature_flags == 0) + goto unsupported; + + if (info->type != VK_IMAGE_TYPE_2D && + vk_format_is_depth_or_stencil(info->format)) + goto unsupported; + + switch (info->type) { + default: + unreachable("bad vkimage type\n"); + case VK_IMAGE_TYPE_1D: + maxExtent.width = 16384; + maxExtent.height = 1; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + break; + case VK_IMAGE_TYPE_2D: + maxExtent.width = 16384; + maxExtent.height = 16384; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + break; + case VK_IMAGE_TYPE_3D: + maxExtent.width = 2048; + maxExtent.height = 2048; + maxExtent.depth = 2048; + maxMipLevels = 12; /* log2(maxWidth) + 1 */ + maxArraySize = 1; + break; + } + + if (info->tiling == VK_IMAGE_TILING_OPTIMAL && + info->type == VK_IMAGE_TYPE_2D && + (format_feature_flags & + (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && + !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && + !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { + sampleCounts |= VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | + VK_SAMPLE_COUNT_8_BIT; + } + + if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + goto unsupported; + } + } + + if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { + goto unsupported; + } + } + + if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + if (!(format_feature_flags & + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + *pImageFormatProperties = (VkImageFormatProperties) { + .maxExtent = maxExtent, + .maxMipLevels = maxMipLevels, + .maxArrayLayers = maxArraySize, + .sampleCounts = sampleCounts, + + /* FINISHME: Accurately calculate + * VkImageFormatProperties::maxResourceSize. + */ + .maxResourceSize = UINT32_MAX, + }; + + return VK_SUCCESS; +unsupported: + *pImageFormatProperties = (VkImageFormatProperties) { + .maxExtent = { 0, 0, 0 }, + .maxMipLevels = 0, + .maxArrayLayers = 0, + .sampleCounts = 0, + .maxResourceSize = 0, + }; + + return VK_ERROR_FORMAT_NOT_SUPPORTED; +} + +VkResult +tu_GetPhysicalDeviceImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageCreateFlags createFlags, + VkImageFormatProperties *pImageFormatProperties) +{ + TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice); + + const VkPhysicalDeviceImageFormatInfo2 info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .pNext = NULL, + .format = format, + .type = type, + .tiling = tiling, + .usage = usage, + .flags = createFlags, + }; + + return tu_get_image_format_properties(physical_device, &info, + pImageFormatProperties); +} + +static VkResult +tu_get_external_image_format_properties( + const struct tu_physical_device *physical_device, + const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo, + VkExternalMemoryHandleTypeFlagBits handleType, + VkExternalMemoryProperties *external_properties) +{ + VkExternalMemoryFeatureFlagBits flags = 0; + VkExternalMemoryHandleTypeFlags export_flags = 0; + VkExternalMemoryHandleTypeFlags compat_flags = 0; + + /* From the Vulkan 1.1.98 spec: + * + * If handleType is not compatible with the format, type, tiling, + * usage, and flags specified in VkPhysicalDeviceImageFormatInfo2, + * then vkGetPhysicalDeviceImageFormatProperties2 returns + * VK_ERROR_FORMAT_NOT_SUPPORTED. + */ + + switch (handleType) { + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: + switch (pImageFormatInfo->type) { + case VK_IMAGE_TYPE_2D: + flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT | + VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; + compat_flags = export_flags = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + break; + default: + return vk_errorf(physical_device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, + "VkExternalMemoryTypeFlagBits(0x%x) unsupported for VkImageType(%d)", + handleType, pImageFormatInfo->type); + } + break; + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: + flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; + compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; + break; + default: + return vk_errorf(physical_device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, + "VkExternalMemoryTypeFlagBits(0x%x) unsupported", + handleType); + } + + *external_properties = (VkExternalMemoryProperties) { + .externalMemoryFeatures = flags, + .exportFromImportedHandleTypes = export_flags, + .compatibleHandleTypes = compat_flags, + }; + + return VK_SUCCESS; +} + +VkResult +tu_GetPhysicalDeviceImageFormatProperties2( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceImageFormatInfo2 *base_info, + VkImageFormatProperties2 *base_props) +{ + TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice); + const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL; + VkExternalImageFormatProperties *external_props = NULL; + VkResult result; + + result = tu_get_image_format_properties( + physical_device, base_info, &base_props->imageFormatProperties); + if (result != VK_SUCCESS) + return result; + + /* Extract input structs */ + vk_foreach_struct_const(s, base_info->pNext) + { + switch (s->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO: + external_info = (const void *) s; + break; + default: + break; + } + } + + /* Extract output structs */ + vk_foreach_struct(s, base_props->pNext) + { + switch (s->sType) { + case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: + external_props = (void *) s; + break; + default: + break; + } + } + + /* From the Vulkan 1.0.42 spec: + * + * If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will + * behave as if VkPhysicalDeviceExternalImageFormatInfo was not + * present and VkExternalImageFormatProperties will be ignored. + */ + if (external_info && external_info->handleType != 0) { + result = tu_get_external_image_format_properties( + physical_device, base_info, external_info->handleType, + &external_props->externalMemoryProperties); + if (result != VK_SUCCESS) + goto fail; + } + + return VK_SUCCESS; + +fail: + if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) { + /* From the Vulkan 1.0.42 spec: + * + * If the combination of parameters to + * vkGetPhysicalDeviceImageFormatProperties2 is not supported by + * the implementation for use in vkCreateImage, then all members of + * imageFormatProperties will be filled with zero. + */ + base_props->imageFormatProperties = (VkImageFormatProperties) { 0 }; + } + + return result; +} + +void +tu_GetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + uint32_t samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t *pNumProperties, + VkSparseImageFormatProperties *pProperties) +{ + /* Sparse images are not yet supported. */ + *pNumProperties = 0; +} + +void +tu_GetPhysicalDeviceSparseImageFormatProperties2( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, + uint32_t *pPropertyCount, + VkSparseImageFormatProperties2 *pProperties) +{ + /* Sparse images are not yet supported. */ + *pPropertyCount = 0; +} + +void +tu_GetPhysicalDeviceExternalBufferProperties( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, + VkExternalBufferProperties *pExternalBufferProperties) +{ + VkExternalMemoryFeatureFlagBits flags = 0; + VkExternalMemoryHandleTypeFlags export_flags = 0; + VkExternalMemoryHandleTypeFlags compat_flags = 0; + switch (pExternalBufferInfo->handleType) { + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: + flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; + compat_flags = export_flags = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + break; + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: + flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; + compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; + break; + default: + break; + } + pExternalBufferProperties->externalMemoryProperties = + (VkExternalMemoryProperties) { + .externalMemoryFeatures = flags, + .exportFromImportedHandleTypes = export_flags, + .compatibleHandleTypes = compat_flags, + }; +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_icd.py b/lib/mesa/src/freedreno/vulkan/tu_icd.py new file mode 100644 index 000000000..1947a969a --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_icd.py @@ -0,0 +1,47 @@ +# Copyright 2017 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import json +import os.path + +from tu_extensions import * + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--out', help='Output json file.', required=True) + parser.add_argument('--lib-path', help='Path to libvulkan_freedreno.so') + args = parser.parse_args() + + path = 'libvulkan_freedreno.so' + if args.lib_path: + path = os.path.join(args.lib_path, path) + + json_data = { + 'file_format_version': '1.0.0', + 'ICD': { + 'library_path': path, + 'api_version': str(MAX_API_VERSION), + }, + } + + with open(args.out, 'w') as f: + json.dump(json_data, f, indent = 4, sort_keys=True, separators=(',', ': ')) diff --git a/lib/mesa/src/freedreno/vulkan/tu_image.c b/lib/mesa/src/freedreno/vulkan/tu_image.c new file mode 100644 index 000000000..657612d42 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_image.c @@ -0,0 +1,380 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include "util/debug.h" +#include "util/u_atomic.h" +#include "vk_format.h" +#include "vk_util.h" + +static inline bool +image_level_linear(struct tu_image *image, int level) +{ + unsigned w = u_minify(image->extent.width, level); + return w < 16; +} + +/* indexed by cpp: */ +static const struct +{ + unsigned pitchalign; + unsigned heightalign; +} tile_alignment[] = { + [1] = { 128, 32 }, [2] = { 128, 16 }, [3] = { 128, 16 }, [4] = { 64, 16 }, + [8] = { 64, 16 }, [12] = { 64, 16 }, [16] = { 64, 16 }, +}; + +static void +setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo) +{ + enum vk_format_layout layout = + vk_format_description(pCreateInfo->format)->layout; + uint32_t layer_size = 0; + uint32_t width = pCreateInfo->extent.width; + uint32_t height = pCreateInfo->extent.height; + uint32_t depth = pCreateInfo->extent.depth; + bool layer_first = pCreateInfo->imageType != VK_IMAGE_TYPE_3D; + uint32_t alignment = pCreateInfo->imageType == VK_IMAGE_TYPE_3D ? 4096 : 1; + uint32_t cpp = vk_format_get_blocksize(pCreateInfo->format); + + uint32_t heightalign = tile_alignment[cpp].heightalign; + + for (unsigned level = 0; level < pCreateInfo->mipLevels; level++) { + struct tu_image_level *slice = &image->levels[level]; + bool linear_level = image_level_linear(image, level); + uint32_t aligned_height = height; + uint32_t blocks; + uint32_t pitchalign; + + if (image->tile_mode && !linear_level) { + pitchalign = tile_alignment[cpp].pitchalign; + aligned_height = align(aligned_height, heightalign); + } else { + pitchalign = 64; + + /* The blits used for mem<->gmem work at a granularity of + * 32x32, which can cause faults due to over-fetch on the + * last level. The simple solution is to over-allocate a + * bit the last level to ensure any over-fetch is harmless. + * The pitch is already sufficiently aligned, but height + * may not be: + */ + if ((level + 1 == pCreateInfo->mipLevels)) + aligned_height = align(aligned_height, 32); + } + + if (layout == VK_FORMAT_LAYOUT_ASTC) + slice->pitch = util_align_npot( + width, + pitchalign * vk_format_get_blockwidth(pCreateInfo->format)); + else + slice->pitch = align(width, pitchalign); + + slice->offset = layer_size; + blocks = vk_format_get_block_count(pCreateInfo->format, slice->pitch, + aligned_height); + + /* 1d array and 2d array textures must all have the same layer size + * for each miplevel on a3xx. 3d textures can have different layer + * sizes for high levels, but the hw auto-sizer is buggy (or at least + * different than what this code does), so as soon as the layer size + * range gets into range, we stop reducing it. + */ + if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D && + (level == 1 || + (level > 1 && image->levels[level - 1].size > 0xf000))) + slice->size = align(blocks * cpp, alignment); + else if (level == 0 || layer_first || alignment == 1) + slice->size = align(blocks * cpp, alignment); + else + slice->size = image->levels[level - 1].size; + + layer_size += slice->size * depth; + + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + } + + image->layer_size = layer_size; +} + +VkResult +tu_image_create(VkDevice _device, + const struct tu_image_create_info *create_info, + const VkAllocationCallbacks *alloc, + VkImage *pImage) +{ + TU_FROM_HANDLE(tu_device, device, _device); + const VkImageCreateInfo *pCreateInfo = create_info->vk_info; + struct tu_image *image = NULL; + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); + + tu_assert(pCreateInfo->mipLevels > 0); + tu_assert(pCreateInfo->arrayLayers > 0); + tu_assert(pCreateInfo->samples > 0); + tu_assert(pCreateInfo->extent.width > 0); + tu_assert(pCreateInfo->extent.height > 0); + tu_assert(pCreateInfo->extent.depth > 0); + + image = vk_zalloc2(&device->alloc, alloc, sizeof(*image), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!image) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + image->type = pCreateInfo->imageType; + + image->vk_format = pCreateInfo->format; + image->tiling = pCreateInfo->tiling; + image->usage = pCreateInfo->usage; + image->flags = pCreateInfo->flags; + image->extent = pCreateInfo->extent; + image->level_count = pCreateInfo->mipLevels; + image->layer_count = pCreateInfo->arrayLayers; + + image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE; + if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) { + for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i) + if (pCreateInfo->pQueueFamilyIndices[i] == + VK_QUEUE_FAMILY_EXTERNAL) + image->queue_family_mask |= (1u << TU_MAX_QUEUE_FAMILIES) - 1u; + else + image->queue_family_mask |= + 1u << pCreateInfo->pQueueFamilyIndices[i]; + } + + image->shareable = + vk_find_struct_const(pCreateInfo->pNext, + EXTERNAL_MEMORY_IMAGE_CREATE_INFO) != NULL; + + image->tile_mode = pCreateInfo->tiling == VK_IMAGE_TILING_OPTIMAL ? 3 : 0; + setup_slices(image, pCreateInfo); + + image->size = image->layer_size * pCreateInfo->arrayLayers; + *pImage = tu_image_to_handle(image); + + return VK_SUCCESS; +} + +void +tu_image_view_init(struct tu_image_view *iview, + struct tu_device *device, + const VkImageViewCreateInfo *pCreateInfo) +{ + TU_FROM_HANDLE(tu_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + + switch (image->type) { + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + assert(range->baseArrayLayer + tu_get_layerCount(image, range) <= + image->layer_count); + break; + case VK_IMAGE_TYPE_3D: + assert(range->baseArrayLayer + tu_get_layerCount(image, range) <= + tu_minify(image->extent.depth, range->baseMipLevel)); + break; + default: + unreachable("bad VkImageType"); + } + + iview->image = image; + iview->type = pCreateInfo->viewType; + iview->vk_format = pCreateInfo->format; + iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; + + if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { + iview->vk_format = vk_format_stencil_only(iview->vk_format); + } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) { + iview->vk_format = vk_format_depth_only(iview->vk_format); + } + + // should we minify? + iview->extent = image->extent; + + iview->base_layer = range->baseArrayLayer; + iview->layer_count = tu_get_layerCount(image, range); + iview->base_mip = range->baseMipLevel; + iview->level_count = tu_get_levelCount(image, range); +} + +unsigned +tu_image_queue_family_mask(const struct tu_image *image, + uint32_t family, + uint32_t queue_family) +{ + if (!image->exclusive) + return image->queue_family_mask; + if (family == VK_QUEUE_FAMILY_EXTERNAL) + return (1u << TU_MAX_QUEUE_FAMILIES) - 1u; + if (family == VK_QUEUE_FAMILY_IGNORED) + return 1u << queue_family; + return 1u << family; +} + +VkResult +tu_CreateImage(VkDevice device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage) +{ +#ifdef ANDROID + const VkNativeBufferANDROID *gralloc_info = + vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID); + + if (gralloc_info) + return tu_image_from_gralloc(device, pCreateInfo, gralloc_info, + pAllocator, pImage); +#endif + + return tu_image_create(device, + &(struct tu_image_create_info) { + .vk_info = pCreateInfo, + .scanout = false, + }, + pAllocator, pImage); +} + +void +tu_DestroyImage(VkDevice _device, + VkImage _image, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_image, image, _image); + + if (!image) + return; + + if (image->owned_memory != VK_NULL_HANDLE) + tu_FreeMemory(_device, image->owned_memory, pAllocator); + + vk_free2(&device->alloc, pAllocator, image); +} + +void +tu_GetImageSubresourceLayout(VkDevice _device, + VkImage _image, + const VkImageSubresource *pSubresource, + VkSubresourceLayout *pLayout) +{ + TU_FROM_HANDLE(tu_image, image, _image); + + const uint32_t layer_offset = image->layer_size * pSubresource->arrayLayer; + const struct tu_image_level *level = + image->levels + pSubresource->mipLevel; + + pLayout->offset = layer_offset + level->offset; + pLayout->size = level->size; + pLayout->rowPitch = + level->pitch * vk_format_get_blocksize(image->vk_format); + pLayout->arrayPitch = image->layer_size; + pLayout->depthPitch = level->size; +} + +VkResult +tu_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_image_view *view; + + view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (view == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + tu_image_view_init(view, device, pCreateInfo); + + *pView = tu_image_view_to_handle(view); + + return VK_SUCCESS; +} + +void +tu_DestroyImageView(VkDevice _device, + VkImageView _iview, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_image_view, iview, _iview); + + if (!iview) + return; + vk_free2(&device->alloc, pAllocator, iview); +} + +void +tu_buffer_view_init(struct tu_buffer_view *view, + struct tu_device *device, + const VkBufferViewCreateInfo *pCreateInfo) +{ + TU_FROM_HANDLE(tu_buffer, buffer, pCreateInfo->buffer); + + view->range = pCreateInfo->range == VK_WHOLE_SIZE + ? buffer->size - pCreateInfo->offset + : pCreateInfo->range; + view->vk_format = pCreateInfo->format; +} + +VkResult +tu_CreateBufferView(VkDevice _device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_buffer_view *view; + + view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!view) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + tu_buffer_view_init(view, device, pCreateInfo); + + *pView = tu_buffer_view_to_handle(view); + + return VK_SUCCESS; +} + +void +tu_DestroyBufferView(VkDevice _device, + VkBufferView bufferView, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_buffer_view, view, bufferView); + + if (!view) + return; + + vk_free2(&device->alloc, pAllocator, view); +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_meta_blit.c b/lib/mesa/src/freedreno/vulkan/tu_meta_blit.c new file mode 100644 index 000000000..da5ff6b12 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_meta_blit.c @@ -0,0 +1,39 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include "nir/nir_builder.h" + +void +tu_CmdBlitImage(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit *pRegions, + VkFilter filter) + +{ +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_meta_buffer.c b/lib/mesa/src/freedreno/vulkan/tu_meta_buffer.c new file mode 100644 index 000000000..98a73413c --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_meta_buffer.c @@ -0,0 +1,19 @@ +#include "tu_private.h" + +void +tu_CmdFillBuffer(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize fillSize, + uint32_t data) +{ +} + +void +tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const void *pData) +{ +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_meta_clear.c b/lib/mesa/src/freedreno/vulkan/tu_meta_clear.c new file mode 100644 index 000000000..2beed5433 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_meta_clear.c @@ -0,0 +1,53 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +void +tu_CmdClearColorImage(VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearColorValue *pColor, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ +} + +void +tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearDepthStencilValue *pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ +} + +void +tu_CmdClearAttachments(VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment *pAttachments, + uint32_t rectCount, + const VkClearRect *pRects) +{ +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_meta_copy.c b/lib/mesa/src/freedreno/vulkan/tu_meta_copy.c new file mode 100644 index 000000000..ced8661d5 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_meta_copy.c @@ -0,0 +1,690 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include "a6xx.xml.h" +#include "adreno_common.xml.h" +#include "adreno_pm4.xml.h" + +#include "vk_format.h" + +#include "tu_cs.h" + +/* + * TODO: + * - image -> image copies + * - 3D textures + * - compressed image formats (need to divide offset/extent) + */ + +static uint32_t +blit_control(enum a6xx_color_fmt fmt) +{ + unsigned blit_cntl = 0xf00000; + blit_cntl |= A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt); + blit_cntl |= A6XX_RB_2D_BLIT_CNTL_IFMT(tu6_rb_fmt_to_ifmt(fmt)); + return blit_cntl; +} + +static uint32_t tu6_sp_2d_src_format(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + uint32_t reg = 0xf000 | A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(tu6_get_native_format(format)->rb); + + int channel = vk_format_get_first_non_void_channel(format); + if (channel < 0) { + /* TODO special format. */ + return reg; + } + if (desc->channel[channel].normalized) { + if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED) + reg |= A6XX_SP_2D_SRC_FORMAT_SINT; + reg |= A6XX_SP_2D_SRC_FORMAT_NORM; + } else if (desc->channel[channel].pure_integer) { + if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED) + reg |= A6XX_SP_2D_SRC_FORMAT_SINT; + else + reg |= A6XX_SP_2D_SRC_FORMAT_UINT; + } + return reg; +} + +static void +tu_dma_prepare(struct tu_cmd_buffer *cmdbuf) +{ + tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 10); + + tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); + tu_cs_emit(&cmdbuf->cs, PC_CCU_INVALIDATE_COLOR); + + tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); + tu_cs_emit(&cmdbuf->cs, LRZ_FLUSH); + + tu_cs_emit_pkt7(&cmdbuf->cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + tu_cs_emit(&cmdbuf->cs, 0x0); + + tu_cs_emit_wfi(&cmdbuf->cs); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_CCU_CNTL, 1); + tu_cs_emit(&cmdbuf->cs, 0x10000000); +} + +static void +tu_copy_buffer(struct tu_cmd_buffer *cmdbuf, + struct tu_bo *src_bo, + uint64_t src_offset, + struct tu_bo *dst_bo, + uint64_t dst_offset, + uint64_t size) +{ + const unsigned max_size_per_iter = 0x4000 - 0x40; + const unsigned max_iterations = + (size + max_size_per_iter) / max_size_per_iter; + + tu_bo_list_add(&cmdbuf->bo_list, src_bo, MSM_SUBMIT_BO_READ); + tu_bo_list_add(&cmdbuf->bo_list, dst_bo, MSM_SUBMIT_BO_WRITE); + + tu_dma_prepare(cmdbuf); + + tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 21 + 48 * max_iterations); + + /* buffer copy setup */ + tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); + tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); + + const uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000; + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); + tu_cs_emit(&cmdbuf->cs, blit_cntl); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); + tu_cs_emit(&cmdbuf->cs, blit_cntl); + + for (; size;) { + uint64_t src_va = src_bo->iova + src_offset; + uint64_t dst_va = dst_bo->iova + dst_offset; + + unsigned src_shift = src_va & 0x3f; + unsigned dst_shift = dst_va & 0x3f; + unsigned max_shift = MAX2(src_shift, dst_shift); + + src_va -= src_shift; + dst_va -= dst_shift; + + uint32_t size_todo = MIN2(0x4000 - max_shift, size); + unsigned pitch = (size_todo + max_shift + 63) & ~63; + + /* + * Emit source: + */ + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); + tu_cs_emit(&cmdbuf->cs, + A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) | + A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000); + tu_cs_emit(&cmdbuf->cs, + A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_shift + size_todo) | + A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */ + tu_cs_emit_qw(&cmdbuf->cs, src_va); + tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch)); + + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + + /* + * Emit destination: + */ + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); + tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) | + A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); + tu_cs_emit_qw(&cmdbuf->cs, dst_va); + + tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(pitch)); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + + /* + * Blit command: + */ + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); + tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_shift)); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_SRC_BR_X_X(src_shift + size_todo - 1)); + tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0)); + tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_Y_Y(0)); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_DST_TL_X(dst_shift) | A6XX_GRAS_2D_DST_TL_Y(0)); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_DST_BR_X(dst_shift + size_todo - 1) | + A6XX_GRAS_2D_DST_BR_Y(0)); + + tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); + tu_cs_emit(&cmdbuf->cs, 0x3f); + tu_cs_emit_wfi(&cmdbuf->cs); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); + tu_cs_emit(&cmdbuf->cs, 0); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); + tu_cs_emit(&cmdbuf->cs, 0xf180); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); + tu_cs_emit(&cmdbuf->cs, 0x01000000); + + tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); + tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); + + tu_cs_emit_wfi(&cmdbuf->cs); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); + tu_cs_emit(&cmdbuf->cs, 0); + + src_offset += size_todo; + dst_offset += size_todo; + size -= size_todo; + } + + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); +} + +static void +tu_copy_buffer_to_image_step(struct tu_cmd_buffer *cmdbuf, + struct tu_buffer *src_buffer, + struct tu_image *dst_image, + const VkBufferImageCopy *copy_info, + VkFormat format, + uint32_t layer, + uint64_t src_va) +{ + const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; + + uint64_t dst_va = dst_image->bo->iova + dst_image->bo_offset + dst_image->layer_size * layer + dst_image->levels[copy_info->imageSubresource.mipLevel].offset; + unsigned dst_pitch = dst_image->levels[copy_info->imageSubresource.mipLevel].pitch * + vk_format_get_blocksize(format); + + unsigned src_pitch; + unsigned src_offset = 0; + if (copy_info->imageExtent.height == 1) { + /* Can't find this in the spec, but not having it is sort of insane? */ + assert(src_va % vk_format_get_blocksize(format) == 0); + + src_offset = (src_va & 63) / vk_format_get_blocksize(format); + src_va &= ~63; + + src_pitch = align((src_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64); + } else { + unsigned src_pixel_stride = copy_info->bufferRowLength + ? copy_info->bufferRowLength + : copy_info->imageExtent.width; + src_pitch = src_pixel_stride * vk_format_get_blocksize(format); + assert(!(src_pitch & 63)); + assert(!(src_va & 63)); + } + + tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48); + + /* + * Emit source: + */ + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); + tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) | + A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | + 0x500000); + tu_cs_emit(&cmdbuf->cs, + A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_offset + copy_info->imageExtent.width) | + A6XX_SP_PS_2D_SRC_SIZE_HEIGHT( + copy_info->imageExtent.height)); /* SP_PS_2D_SRC_SIZE */ + tu_cs_emit_qw(&cmdbuf->cs, src_va); + tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch)); + + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + + /* + * Emit destination: + */ + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); + tu_cs_emit(&cmdbuf->cs, + A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) | + A6XX_RB_2D_DST_INFO_TILE_MODE(dst_image->tile_mode) | + A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); + tu_cs_emit_qw(&cmdbuf->cs, dst_va); + tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch)); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); + tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_offset)); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_SRC_BR_X_X(src_offset + copy_info->imageExtent.width - 1)); + tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0)); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageExtent.height - 1)); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_DST_TL_X(copy_info->imageOffset.x) | + A6XX_GRAS_2D_DST_TL_Y(copy_info->imageOffset.y)); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_DST_BR_X(copy_info->imageOffset.x + + copy_info->imageExtent.width - 1) | + A6XX_GRAS_2D_DST_BR_Y(copy_info->imageOffset.y + + copy_info->imageExtent.height - 1)); + + tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); + tu_cs_emit(&cmdbuf->cs, 0x3f); + tu_cs_emit_wfi(&cmdbuf->cs); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); + tu_cs_emit(&cmdbuf->cs, 0); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); + tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format)); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); + tu_cs_emit(&cmdbuf->cs, 0x01000000); + + tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); + tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); + + tu_cs_emit_wfi(&cmdbuf->cs); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); + tu_cs_emit(&cmdbuf->cs, 0); +} + +static void +tu_copy_buffer_to_image(struct tu_cmd_buffer *cmdbuf, + struct tu_buffer *src_buffer, + struct tu_image *dst_image, + const VkBufferImageCopy *copy_info) +{ + tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ); + tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE); + + /* general setup */ + tu_dma_prepare(cmdbuf); + + tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6); + + /* buffer copy setup */ + tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); + tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); + + VkFormat format = dst_image->vk_format; + const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; + + const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000; + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); + tu_cs_emit(&cmdbuf->cs, blit_cntl); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); + tu_cs_emit(&cmdbuf->cs, blit_cntl); + + unsigned src_pixel_stride = copy_info->bufferRowLength + ? copy_info->bufferRowLength + : copy_info->imageExtent.width; + unsigned cpp = vk_format_get_blocksize(format); + unsigned src_pitch = src_pixel_stride * cpp; + + for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) { + unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset; + uint64_t src_va = src_buffer->bo->iova + src_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * src_pitch; + + if ((src_pitch & 63) || (src_va & 63)) { + /* Do a per line copy */ + VkBufferImageCopy line_copy_info = *copy_info; + line_copy_info.imageExtent.height = 1; + for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) { + /* + * if src_va is not aligned the line copy will need to adjust. Give it + * room to do so. + */ + unsigned max_width = 16384 - (src_va & 0x3f) ? 64 : 0; + line_copy_info.imageOffset.x = copy_info->imageOffset.x; + line_copy_info.imageExtent.width = copy_info->imageExtent.width; + + for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) { + tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, &line_copy_info, format, layer, src_va + c * cpp); + + line_copy_info.imageOffset.x += max_width; + line_copy_info.imageExtent.width -= max_width; + } + + line_copy_info.imageOffset.y++; + src_va += src_pitch; + } + } else { + tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, copy_info, format, layer, src_va); + } + } + + tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15); + + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); +} + +static void +tu_copy_image_to_buffer_step(struct tu_cmd_buffer *cmdbuf, + struct tu_image *src_image, + struct tu_buffer *dst_buffer, + const VkBufferImageCopy *copy_info, + VkFormat format, + uint32_t layer, + uint64_t dst_va) +{ + const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; + + uint64_t src_va = src_image->bo->iova + src_image->bo_offset + src_image->layer_size * layer + src_image->levels[copy_info->imageSubresource.mipLevel].offset; + unsigned src_pitch = src_image->levels[copy_info->imageSubresource.mipLevel].pitch * + vk_format_get_blocksize(format); + + unsigned dst_pitch; + unsigned dst_offset = 0; + if (copy_info->imageExtent.height == 1) { + /* Can't find this in the spec, but not having it is sort of insane? */ + assert(dst_va % vk_format_get_blocksize(format) == 0); + + dst_offset = (dst_va & 63) / vk_format_get_blocksize(format); + dst_va &= ~63; + + dst_pitch = align((dst_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64); + } else { + unsigned dst_pixel_stride = copy_info->bufferRowLength + ? copy_info->bufferRowLength + : copy_info->imageExtent.width; + dst_pitch = dst_pixel_stride * vk_format_get_blocksize(format); + assert(!(dst_pitch & 63)); + assert(!(dst_va & 63)); + } + + + tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48); + + /* + * Emit source: + */ + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); + tu_cs_emit(&cmdbuf->cs, + A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) | + A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) | + A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000); + tu_cs_emit(&cmdbuf->cs, + A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) | + A6XX_SP_PS_2D_SRC_SIZE_HEIGHT( + src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */ + tu_cs_emit_qw(&cmdbuf->cs, src_va); + tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch)); + + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + + /* + * Emit destination: + */ + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); + tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) | + A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); + tu_cs_emit_qw(&cmdbuf->cs, dst_va); + tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch)); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); + tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->imageOffset.x)); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_SRC_BR_X_X(copy_info->imageOffset.x + + copy_info->imageExtent.width - 1)); + tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->imageOffset.y)); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageOffset.y + + copy_info->imageExtent.height - 1)); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_DST_TL_X(dst_offset) | A6XX_GRAS_2D_DST_TL_Y(0)); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_DST_BR_X(dst_offset + copy_info->imageExtent.width - 1) | + A6XX_GRAS_2D_DST_BR_Y(copy_info->imageExtent.height - 1)); + + tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); + tu_cs_emit(&cmdbuf->cs, 0x3f); + tu_cs_emit_wfi(&cmdbuf->cs); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); + tu_cs_emit(&cmdbuf->cs, 0); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); + tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format)); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); + tu_cs_emit(&cmdbuf->cs, 0x01000000); + + tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); + tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); + + tu_cs_emit_wfi(&cmdbuf->cs); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); + tu_cs_emit(&cmdbuf->cs, 0); +} + +static void +tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf, + struct tu_image *src_image, + struct tu_buffer *dst_buffer, + const VkBufferImageCopy *copy_info) +{ + tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); + tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE); + + /* general setup */ + tu_dma_prepare(cmdbuf); + + tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6); + + /* buffer copy setup */ + tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); + tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); + + VkFormat format = src_image->vk_format; + const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; + + unsigned dst_pixel_stride = copy_info->bufferRowLength + ? copy_info->bufferRowLength + : copy_info->imageExtent.width; + unsigned cpp = vk_format_get_blocksize(format); + unsigned dst_pitch = dst_pixel_stride * cpp; + + + const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000; + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); + tu_cs_emit(&cmdbuf->cs, blit_cntl); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); + tu_cs_emit(&cmdbuf->cs, blit_cntl); + + for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) { + unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset; + uint64_t dst_va = dst_buffer->bo->iova + dst_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * dst_pitch; + + if ((dst_pitch & 63) || (dst_va & 63)) { + /* Do a per line copy */ + VkBufferImageCopy line_copy_info = *copy_info; + line_copy_info.imageExtent.height = 1; + for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) { + /* + * if dst_va is not aligned the line copy will need to adjust. Give it + * room to do so. + */ + unsigned max_width = 16384 - (dst_va & 0x3f) ? 64 : 0; + line_copy_info.imageOffset.x = copy_info->imageOffset.x; + line_copy_info.imageExtent.width = copy_info->imageExtent.width; + + for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) { + tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, &line_copy_info, format, layer, dst_va + c * cpp); + + line_copy_info.imageOffset.x += max_width; + line_copy_info.imageExtent.width -= max_width; + } + + line_copy_info.imageOffset.y++; + dst_va += dst_pitch; + } + } else { + tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, copy_info, format, layer, dst_va); + } + } + + tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15); + + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); +} + +void +tu_CmdCopyBuffer(VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy *pRegions) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer); + TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer); + TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer); + + for (unsigned i = 0; i < regionCount; ++i) { + uint64_t src_offset = src_buffer->bo_offset + pRegions[i].srcOffset; + uint64_t dst_offset = dst_buffer->bo_offset + pRegions[i].dstOffset; + + tu_copy_buffer(cmdbuf, src_buffer->bo, src_offset, dst_buffer->bo, + dst_offset, pRegions[i].size); + } +} + +void +tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy *pRegions) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + TU_FROM_HANDLE(tu_image, dest_image, destImage); + TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer); + + for (unsigned i = 0; i < regionCount; ++i) { + tu_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image, + pRegions + i); + } +} + +void +tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy *pRegions) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + TU_FROM_HANDLE(tu_image, src_image, srcImage); + TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer); + + for (unsigned i = 0; i < regionCount; ++i) { + tu_copy_image_to_buffer(cmd_buffer, src_image, dst_buffer, + pRegions + i); + } +} + +static void +meta_copy_image(struct tu_cmd_buffer *cmd_buffer, + struct tu_image *src_image, + VkImageLayout src_image_layout, + struct tu_image *dest_image, + VkImageLayout dest_image_layout, + uint32_t regionCount, + const VkImageCopy *pRegions) +{ +} + +void +tu_CmdCopyImage(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy *pRegions) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + TU_FROM_HANDLE(tu_image, src_image, srcImage); + TU_FROM_HANDLE(tu_image, dest_image, destImage); + + meta_copy_image(cmd_buffer, src_image, srcImageLayout, dest_image, + destImageLayout, regionCount, pRegions); +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_meta_resolve.c b/lib/mesa/src/freedreno/vulkan/tu_meta_resolve.c new file mode 100644 index 000000000..4a9ebedfd --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_meta_resolve.c @@ -0,0 +1,41 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include <assert.h> +#include <stdbool.h> + +#include "nir/nir_builder.h" +#include "vk_format.h" + +void +tu_CmdResolveImage(VkCommandBuffer cmd_buffer_h, + VkImage src_image_h, + VkImageLayout src_image_layout, + VkImage dest_image_h, + VkImageLayout dest_image_layout, + uint32_t region_count, + const VkImageResolve *regions) +{ +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_pass.c b/lib/mesa/src/freedreno/vulkan/tu_pass.c new file mode 100644 index 000000000..e3d9f23df --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_pass.c @@ -0,0 +1,416 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "tu_private.h" + +#include "vk_util.h" + +VkResult +tu_CreateRenderPass(VkDevice _device, + const VkRenderPassCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkRenderPass *pRenderPass) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_render_pass *pass; + size_t size; + size_t attachments_offset; + VkRenderPassMultiviewCreateInfo *multiview_info = NULL; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + + size = sizeof(*pass); + size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); + attachments_offset = size; + size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); + + pass = vk_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(pass, 0, size); + pass->attachment_count = pCreateInfo->attachmentCount; + pass->subpass_count = pCreateInfo->subpassCount; + pass->attachments = (void *) pass + attachments_offset; + + vk_foreach_struct(ext, pCreateInfo->pNext) + { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO: + multiview_info = (VkRenderPassMultiviewCreateInfo *) ext; + break; + default: + break; + } + } + + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + struct tu_render_pass_attachment *att = &pass->attachments[i]; + + att->format = pCreateInfo->pAttachments[i].format; + att->samples = pCreateInfo->pAttachments[i].samples; + att->load_op = pCreateInfo->pAttachments[i].loadOp; + att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + att->initial_layout = pCreateInfo->pAttachments[i].initialLayout; + att->final_layout = pCreateInfo->pAttachments[i].finalLayout; + // att->store_op = pCreateInfo->pAttachments[i].storeOp; + // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + } + uint32_t subpass_attachment_count = 0; + struct tu_subpass_attachment *p; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + + subpass_attachment_count += + desc->inputAttachmentCount + desc->colorAttachmentCount + + (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + + (desc->pDepthStencilAttachment != NULL); + } + + if (subpass_attachment_count) { + pass->subpass_attachments = vk_alloc2( + &device->alloc, pAllocator, + subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass->subpass_attachments == NULL) { + vk_free2(&device->alloc, pAllocator, pass); + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } + } else + pass->subpass_attachments = NULL; + + p = pass->subpass_attachments; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + uint32_t color_sample_count = 1, depth_sample_count = 1; + struct tu_subpass *subpass = &pass->subpasses[i]; + + subpass->input_count = desc->inputAttachmentCount; + subpass->color_count = desc->colorAttachmentCount; + if (multiview_info) + subpass->view_mask = multiview_info->pViewMasks[i]; + + if (desc->inputAttachmentCount > 0) { + subpass->input_attachments = p; + p += desc->inputAttachmentCount; + + for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { + subpass->input_attachments[j] = (struct tu_subpass_attachment) { + .attachment = desc->pInputAttachments[j].attachment, + .layout = desc->pInputAttachments[j].layout, + }; + if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED) + pass->attachments[desc->pInputAttachments[j].attachment] + .view_mask |= subpass->view_mask; + } + } + + if (desc->colorAttachmentCount > 0) { + subpass->color_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + subpass->color_attachments[j] = (struct tu_subpass_attachment) { + .attachment = desc->pColorAttachments[j].attachment, + .layout = desc->pColorAttachments[j].layout, + }; + if (desc->pColorAttachments[j].attachment != + VK_ATTACHMENT_UNUSED) { + pass->attachments[desc->pColorAttachments[j].attachment] + .view_mask |= subpass->view_mask; + color_sample_count = + pCreateInfo + ->pAttachments[desc->pColorAttachments[j].attachment] + .samples; + } + } + } + + subpass->has_resolve = false; + if (desc->pResolveAttachments) { + subpass->resolve_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + uint32_t a = desc->pResolveAttachments[j].attachment; + subpass->resolve_attachments[j] = (struct tu_subpass_attachment) { + .attachment = desc->pResolveAttachments[j].attachment, + .layout = desc->pResolveAttachments[j].layout, + }; + if (a != VK_ATTACHMENT_UNUSED) { + subpass->has_resolve = true; + pass->attachments[desc->pResolveAttachments[j].attachment] + .view_mask |= subpass->view_mask; + } + } + } + + if (desc->pDepthStencilAttachment) { + subpass->depth_stencil_attachment = (struct tu_subpass_attachment) { + .attachment = desc->pDepthStencilAttachment->attachment, + .layout = desc->pDepthStencilAttachment->layout, + }; + if (desc->pDepthStencilAttachment->attachment != + VK_ATTACHMENT_UNUSED) { + pass->attachments[desc->pDepthStencilAttachment->attachment] + .view_mask |= subpass->view_mask; + depth_sample_count = + pCreateInfo + ->pAttachments[desc->pDepthStencilAttachment->attachment] + .samples; + } + } else { + subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; + } + + subpass->max_sample_count = + MAX2(color_sample_count, depth_sample_count); + } + + for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { + uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass; + if (dst == VK_SUBPASS_EXTERNAL) { + pass->end_barrier.src_stage_mask = + pCreateInfo->pDependencies[i].srcStageMask; + pass->end_barrier.src_access_mask = + pCreateInfo->pDependencies[i].srcAccessMask; + pass->end_barrier.dst_access_mask = + pCreateInfo->pDependencies[i].dstAccessMask; + } else { + pass->subpasses[dst].start_barrier.src_stage_mask = + pCreateInfo->pDependencies[i].srcStageMask; + pass->subpasses[dst].start_barrier.src_access_mask = + pCreateInfo->pDependencies[i].srcAccessMask; + pass->subpasses[dst].start_barrier.dst_access_mask = + pCreateInfo->pDependencies[i].dstAccessMask; + } + } + + *pRenderPass = tu_render_pass_to_handle(pass); + + return VK_SUCCESS; +} + +VkResult +tu_CreateRenderPass2KHR(VkDevice _device, + const VkRenderPassCreateInfo2KHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkRenderPass *pRenderPass) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_render_pass *pass; + size_t size; + size_t attachments_offset; + + assert(pCreateInfo->sType == + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR); + + size = sizeof(*pass); + size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); + attachments_offset = size; + size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); + + pass = vk_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(pass, 0, size); + pass->attachment_count = pCreateInfo->attachmentCount; + pass->subpass_count = pCreateInfo->subpassCount; + pass->attachments = (void *) pass + attachments_offset; + + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + struct tu_render_pass_attachment *att = &pass->attachments[i]; + + att->format = pCreateInfo->pAttachments[i].format; + att->samples = pCreateInfo->pAttachments[i].samples; + att->load_op = pCreateInfo->pAttachments[i].loadOp; + att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + att->initial_layout = pCreateInfo->pAttachments[i].initialLayout; + att->final_layout = pCreateInfo->pAttachments[i].finalLayout; + // att->store_op = pCreateInfo->pAttachments[i].storeOp; + // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + } + uint32_t subpass_attachment_count = 0; + struct tu_subpass_attachment *p; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i]; + + subpass_attachment_count += + desc->inputAttachmentCount + desc->colorAttachmentCount + + (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + + (desc->pDepthStencilAttachment != NULL); + } + + if (subpass_attachment_count) { + pass->subpass_attachments = vk_alloc2( + &device->alloc, pAllocator, + subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass->subpass_attachments == NULL) { + vk_free2(&device->alloc, pAllocator, pass); + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } + } else + pass->subpass_attachments = NULL; + + p = pass->subpass_attachments; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i]; + uint32_t color_sample_count = 1, depth_sample_count = 1; + struct tu_subpass *subpass = &pass->subpasses[i]; + + subpass->input_count = desc->inputAttachmentCount; + subpass->color_count = desc->colorAttachmentCount; + subpass->view_mask = desc->viewMask; + + if (desc->inputAttachmentCount > 0) { + subpass->input_attachments = p; + p += desc->inputAttachmentCount; + + for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { + subpass->input_attachments[j] = (struct tu_subpass_attachment) { + .attachment = desc->pInputAttachments[j].attachment, + .layout = desc->pInputAttachments[j].layout, + }; + if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED) + pass->attachments[desc->pInputAttachments[j].attachment] + .view_mask |= subpass->view_mask; + } + } + + if (desc->colorAttachmentCount > 0) { + subpass->color_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + subpass->color_attachments[j] = (struct tu_subpass_attachment) { + .attachment = desc->pColorAttachments[j].attachment, + .layout = desc->pColorAttachments[j].layout, + }; + if (desc->pColorAttachments[j].attachment != + VK_ATTACHMENT_UNUSED) { + pass->attachments[desc->pColorAttachments[j].attachment] + .view_mask |= subpass->view_mask; + color_sample_count = + pCreateInfo + ->pAttachments[desc->pColorAttachments[j].attachment] + .samples; + } + } + } + + subpass->has_resolve = false; + if (desc->pResolveAttachments) { + subpass->resolve_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + uint32_t a = desc->pResolveAttachments[j].attachment; + subpass->resolve_attachments[j] = (struct tu_subpass_attachment) { + .attachment = desc->pResolveAttachments[j].attachment, + .layout = desc->pResolveAttachments[j].layout, + }; + if (a != VK_ATTACHMENT_UNUSED) { + subpass->has_resolve = true; + pass->attachments[desc->pResolveAttachments[j].attachment] + .view_mask |= subpass->view_mask; + } + } + } + + if (desc->pDepthStencilAttachment) { + subpass->depth_stencil_attachment = (struct tu_subpass_attachment) { + .attachment = desc->pDepthStencilAttachment->attachment, + .layout = desc->pDepthStencilAttachment->layout, + }; + if (desc->pDepthStencilAttachment->attachment != + VK_ATTACHMENT_UNUSED) { + pass->attachments[desc->pDepthStencilAttachment->attachment] + .view_mask |= subpass->view_mask; + depth_sample_count = + pCreateInfo + ->pAttachments[desc->pDepthStencilAttachment->attachment] + .samples; + } + } else { + subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; + } + + subpass->max_sample_count = + MAX2(color_sample_count, depth_sample_count); + } + + for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { + uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass; + if (dst == VK_SUBPASS_EXTERNAL) { + pass->end_barrier.src_stage_mask = + pCreateInfo->pDependencies[i].srcStageMask; + pass->end_barrier.src_access_mask = + pCreateInfo->pDependencies[i].srcAccessMask; + pass->end_barrier.dst_access_mask = + pCreateInfo->pDependencies[i].dstAccessMask; + } else { + pass->subpasses[dst].start_barrier.src_stage_mask = + pCreateInfo->pDependencies[i].srcStageMask; + pass->subpasses[dst].start_barrier.src_access_mask = + pCreateInfo->pDependencies[i].srcAccessMask; + pass->subpasses[dst].start_barrier.dst_access_mask = + pCreateInfo->pDependencies[i].dstAccessMask; + } + } + + *pRenderPass = tu_render_pass_to_handle(pass); + + return VK_SUCCESS; +} + +void +tu_DestroyRenderPass(VkDevice _device, + VkRenderPass _pass, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_render_pass, pass, _pass); + + if (!_pass) + return; + vk_free2(&device->alloc, pAllocator, pass->subpass_attachments); + vk_free2(&device->alloc, pAllocator, pass); +} + +void +tu_GetRenderAreaGranularity(VkDevice _device, + VkRenderPass renderPass, + VkExtent2D *pGranularity) +{ + TU_FROM_HANDLE(tu_device, device, _device); + + pGranularity->width = device->physical_device->tile_align_w; + pGranularity->height = device->physical_device->tile_align_h; +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_pipeline.c b/lib/mesa/src/freedreno/vulkan/tu_pipeline.c new file mode 100644 index 000000000..9964020a8 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_pipeline.c @@ -0,0 +1,1896 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include "main/menums.h" +#include "nir/nir.h" +#include "nir/nir_builder.h" +#include "spirv/nir_spirv.h" +#include "util/debug.h" +#include "util/mesa-sha1.h" +#include "util/u_atomic.h" +#include "vk_format.h" +#include "vk_util.h" + +#include "tu_cs.h" + +struct tu_pipeline_builder +{ + struct tu_device *device; + struct tu_pipeline_cache *cache; + const VkAllocationCallbacks *alloc; + const VkGraphicsPipelineCreateInfo *create_info; + + struct tu_shader *shaders[MESA_SHADER_STAGES]; + uint32_t shader_offsets[MESA_SHADER_STAGES]; + uint32_t binning_vs_offset; + uint32_t shader_total_size; + + bool rasterizer_discard; + /* these states are affectd by rasterizer_discard */ + VkSampleCountFlagBits samples; + bool use_depth_stencil_attachment; + bool use_color_attachments; + uint32_t color_attachment_count; + VkFormat color_attachment_formats[MAX_RTS]; +}; + +static enum tu_dynamic_state_bits +tu_dynamic_state_bit(VkDynamicState state) +{ + switch (state) { + case VK_DYNAMIC_STATE_VIEWPORT: + return TU_DYNAMIC_VIEWPORT; + case VK_DYNAMIC_STATE_SCISSOR: + return TU_DYNAMIC_SCISSOR; + case VK_DYNAMIC_STATE_LINE_WIDTH: + return TU_DYNAMIC_LINE_WIDTH; + case VK_DYNAMIC_STATE_DEPTH_BIAS: + return TU_DYNAMIC_DEPTH_BIAS; + case VK_DYNAMIC_STATE_BLEND_CONSTANTS: + return TU_DYNAMIC_BLEND_CONSTANTS; + case VK_DYNAMIC_STATE_DEPTH_BOUNDS: + return TU_DYNAMIC_DEPTH_BOUNDS; + case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: + return TU_DYNAMIC_STENCIL_COMPARE_MASK; + case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: + return TU_DYNAMIC_STENCIL_WRITE_MASK; + case VK_DYNAMIC_STATE_STENCIL_REFERENCE: + return TU_DYNAMIC_STENCIL_REFERENCE; + default: + unreachable("invalid dynamic state"); + return 0; + } +} + +static gl_shader_stage +tu_shader_stage(VkShaderStageFlagBits stage) +{ + switch (stage) { + case VK_SHADER_STAGE_VERTEX_BIT: + return MESA_SHADER_VERTEX; + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: + return MESA_SHADER_TESS_CTRL; + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: + return MESA_SHADER_TESS_EVAL; + case VK_SHADER_STAGE_GEOMETRY_BIT: + return MESA_SHADER_GEOMETRY; + case VK_SHADER_STAGE_FRAGMENT_BIT: + return MESA_SHADER_FRAGMENT; + case VK_SHADER_STAGE_COMPUTE_BIT: + return MESA_SHADER_COMPUTE; + default: + unreachable("invalid VkShaderStageFlagBits"); + return MESA_SHADER_NONE; + } +} + +static const VkVertexInputAttributeDescription * +tu_find_vertex_input_attribute( + const VkPipelineVertexInputStateCreateInfo *vi_info, uint32_t slot) +{ + assert(slot >= VERT_ATTRIB_GENERIC0); + slot -= VERT_ATTRIB_GENERIC0; + for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { + if (vi_info->pVertexAttributeDescriptions[i].location == slot) + return &vi_info->pVertexAttributeDescriptions[i]; + } + return NULL; +} + +static const VkVertexInputBindingDescription * +tu_find_vertex_input_binding( + const VkPipelineVertexInputStateCreateInfo *vi_info, + const VkVertexInputAttributeDescription *vi_attr) +{ + assert(vi_attr); + for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { + if (vi_info->pVertexBindingDescriptions[i].binding == vi_attr->binding) + return &vi_info->pVertexBindingDescriptions[i]; + } + return NULL; +} + +static bool +tu_logic_op_reads_dst(VkLogicOp op) +{ + switch (op) { + case VK_LOGIC_OP_CLEAR: + case VK_LOGIC_OP_COPY: + case VK_LOGIC_OP_COPY_INVERTED: + case VK_LOGIC_OP_SET: + return false; + default: + return true; + } +} + +static VkBlendFactor +tu_blend_factor_no_dst_alpha(VkBlendFactor factor) +{ + /* treat dst alpha as 1.0 and avoid reading it */ + switch (factor) { + case VK_BLEND_FACTOR_DST_ALPHA: + return VK_BLEND_FACTOR_ONE; + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: + return VK_BLEND_FACTOR_ZERO; + default: + return factor; + } +} + +static enum pc_di_primtype +tu6_primtype(VkPrimitiveTopology topology) +{ + switch (topology) { + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: + return DI_PT_POINTLIST; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: + return DI_PT_LINELIST; + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: + return DI_PT_LINESTRIP; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: + return DI_PT_TRILIST; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: + return DI_PT_TRILIST; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: + return DI_PT_TRIFAN; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: + return DI_PT_LINE_ADJ; + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: + return DI_PT_LINESTRIP_ADJ; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: + return DI_PT_TRI_ADJ; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: + return DI_PT_TRISTRIP_ADJ; + case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: + default: + unreachable("invalid primitive topology"); + return DI_PT_NONE; + } +} + +static enum adreno_compare_func +tu6_compare_func(VkCompareOp op) +{ + switch (op) { + case VK_COMPARE_OP_NEVER: + return FUNC_NEVER; + case VK_COMPARE_OP_LESS: + return FUNC_LESS; + case VK_COMPARE_OP_EQUAL: + return FUNC_EQUAL; + case VK_COMPARE_OP_LESS_OR_EQUAL: + return FUNC_LEQUAL; + case VK_COMPARE_OP_GREATER: + return FUNC_GREATER; + case VK_COMPARE_OP_NOT_EQUAL: + return FUNC_NOTEQUAL; + case VK_COMPARE_OP_GREATER_OR_EQUAL: + return FUNC_GEQUAL; + case VK_COMPARE_OP_ALWAYS: + return FUNC_ALWAYS; + default: + unreachable("invalid VkCompareOp"); + return FUNC_NEVER; + } +} + +static enum adreno_stencil_op +tu6_stencil_op(VkStencilOp op) +{ + switch (op) { + case VK_STENCIL_OP_KEEP: + return STENCIL_KEEP; + case VK_STENCIL_OP_ZERO: + return STENCIL_ZERO; + case VK_STENCIL_OP_REPLACE: + return STENCIL_REPLACE; + case VK_STENCIL_OP_INCREMENT_AND_CLAMP: + return STENCIL_INCR_CLAMP; + case VK_STENCIL_OP_DECREMENT_AND_CLAMP: + return STENCIL_DECR_CLAMP; + case VK_STENCIL_OP_INVERT: + return STENCIL_INVERT; + case VK_STENCIL_OP_INCREMENT_AND_WRAP: + return STENCIL_INCR_WRAP; + case VK_STENCIL_OP_DECREMENT_AND_WRAP: + return STENCIL_DECR_WRAP; + default: + unreachable("invalid VkStencilOp"); + return STENCIL_KEEP; + } +} + +static enum a3xx_rop_code +tu6_rop(VkLogicOp op) +{ + switch (op) { + case VK_LOGIC_OP_CLEAR: + return ROP_CLEAR; + case VK_LOGIC_OP_AND: + return ROP_AND; + case VK_LOGIC_OP_AND_REVERSE: + return ROP_AND_REVERSE; + case VK_LOGIC_OP_COPY: + return ROP_COPY; + case VK_LOGIC_OP_AND_INVERTED: + return ROP_AND_INVERTED; + case VK_LOGIC_OP_NO_OP: + return ROP_NOOP; + case VK_LOGIC_OP_XOR: + return ROP_XOR; + case VK_LOGIC_OP_OR: + return ROP_OR; + case VK_LOGIC_OP_NOR: + return ROP_NOR; + case VK_LOGIC_OP_EQUIVALENT: + return ROP_EQUIV; + case VK_LOGIC_OP_INVERT: + return ROP_INVERT; + case VK_LOGIC_OP_OR_REVERSE: + return ROP_OR_REVERSE; + case VK_LOGIC_OP_COPY_INVERTED: + return ROP_COPY_INVERTED; + case VK_LOGIC_OP_OR_INVERTED: + return ROP_OR_INVERTED; + case VK_LOGIC_OP_NAND: + return ROP_NAND; + case VK_LOGIC_OP_SET: + return ROP_SET; + default: + unreachable("invalid VkLogicOp"); + return ROP_NOOP; + } +} + +static enum adreno_rb_blend_factor +tu6_blend_factor(VkBlendFactor factor) +{ + switch (factor) { + case VK_BLEND_FACTOR_ZERO: + return FACTOR_ZERO; + case VK_BLEND_FACTOR_ONE: + return FACTOR_ONE; + case VK_BLEND_FACTOR_SRC_COLOR: + return FACTOR_SRC_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: + return FACTOR_ONE_MINUS_SRC_COLOR; + case VK_BLEND_FACTOR_DST_COLOR: + return FACTOR_DST_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: + return FACTOR_ONE_MINUS_DST_COLOR; + case VK_BLEND_FACTOR_SRC_ALPHA: + return FACTOR_SRC_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: + return FACTOR_ONE_MINUS_SRC_ALPHA; + case VK_BLEND_FACTOR_DST_ALPHA: + return FACTOR_DST_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: + return FACTOR_ONE_MINUS_DST_ALPHA; + case VK_BLEND_FACTOR_CONSTANT_COLOR: + return FACTOR_CONSTANT_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: + return FACTOR_ONE_MINUS_CONSTANT_COLOR; + case VK_BLEND_FACTOR_CONSTANT_ALPHA: + return FACTOR_CONSTANT_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: + return FACTOR_ONE_MINUS_CONSTANT_ALPHA; + case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: + return FACTOR_SRC_ALPHA_SATURATE; + case VK_BLEND_FACTOR_SRC1_COLOR: + return FACTOR_SRC1_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: + return FACTOR_ONE_MINUS_SRC1_COLOR; + case VK_BLEND_FACTOR_SRC1_ALPHA: + return FACTOR_SRC1_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: + return FACTOR_ONE_MINUS_SRC1_ALPHA; + default: + unreachable("invalid VkBlendFactor"); + return FACTOR_ZERO; + } +} + +static enum a3xx_rb_blend_opcode +tu6_blend_op(VkBlendOp op) +{ + switch (op) { + case VK_BLEND_OP_ADD: + return BLEND_DST_PLUS_SRC; + case VK_BLEND_OP_SUBTRACT: + return BLEND_SRC_MINUS_DST; + case VK_BLEND_OP_REVERSE_SUBTRACT: + return BLEND_DST_MINUS_SRC; + case VK_BLEND_OP_MIN: + return BLEND_MIN_DST_SRC; + case VK_BLEND_OP_MAX: + return BLEND_MAX_DST_SRC; + default: + unreachable("invalid VkBlendOp"); + return BLEND_DST_PLUS_SRC; + } +} + +static void +tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs) +{ + uint32_t sp_vs_ctrl = + A6XX_SP_VS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | + A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) | + A6XX_SP_VS_CTRL_REG0_MERGEDREGS | + A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack); + if (vs->num_samp) + sp_vs_ctrl |= A6XX_SP_VS_CTRL_REG0_PIXLODENABLE; + + uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(vs->num_samp) | + A6XX_SP_VS_CONFIG_NSAMP(vs->num_samp); + if (vs->instrlen) + sp_vs_config |= A6XX_SP_VS_CONFIG_ENABLED; + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_CTRL_REG0, 1); + tu_cs_emit(cs, sp_vs_ctrl); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_CONFIG, 2); + tu_cs_emit(cs, sp_vs_config); + tu_cs_emit(cs, vs->instrlen); + + tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_VS_CNTL, 1); + tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(align(vs->constlen, 4)) | 0x100); +} + +static void +tu6_emit_hs_config(struct tu_cs *cs, const struct ir3_shader_variant *hs) +{ + uint32_t sp_hs_config = 0; + if (hs->instrlen) + sp_hs_config |= A6XX_SP_HS_CONFIG_ENABLED; + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_UNKNOWN_A831, 1); + tu_cs_emit(cs, 0); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_CONFIG, 2); + tu_cs_emit(cs, sp_hs_config); + tu_cs_emit(cs, hs->instrlen); + + tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_HS_CNTL, 1); + tu_cs_emit(cs, A6XX_HLSQ_HS_CNTL_CONSTLEN(align(hs->constlen, 4))); +} + +static void +tu6_emit_ds_config(struct tu_cs *cs, const struct ir3_shader_variant *ds) +{ + uint32_t sp_ds_config = 0; + if (ds->instrlen) + sp_ds_config |= A6XX_SP_DS_CONFIG_ENABLED; + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_CONFIG, 2); + tu_cs_emit(cs, sp_ds_config); + tu_cs_emit(cs, ds->instrlen); + + tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_DS_CNTL, 1); + tu_cs_emit(cs, A6XX_HLSQ_DS_CNTL_CONSTLEN(align(ds->constlen, 4))); +} + +static void +tu6_emit_gs_config(struct tu_cs *cs, const struct ir3_shader_variant *gs) +{ + uint32_t sp_gs_config = 0; + if (gs->instrlen) + sp_gs_config |= A6XX_SP_GS_CONFIG_ENABLED; + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_UNKNOWN_A871, 1); + tu_cs_emit(cs, 0); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CONFIG, 2); + tu_cs_emit(cs, sp_gs_config); + tu_cs_emit(cs, gs->instrlen); + + tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_GS_CNTL, 1); + tu_cs_emit(cs, A6XX_HLSQ_GS_CNTL_CONSTLEN(align(gs->constlen, 4))); +} + +static void +tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs) +{ + uint32_t sp_fs_ctrl = + A6XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | 0x1000000 | + A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) | + A6XX_SP_FS_CTRL_REG0_MERGEDREGS | + A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(fs->branchstack); + if (fs->total_in > 0 || fs->frag_coord) + sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_VARYING; + if (fs->num_samp > 0) + sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_PIXLODENABLE; + + uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(fs->num_samp) | + A6XX_SP_FS_CONFIG_NSAMP(fs->num_samp); + if (fs->instrlen) + sp_fs_config |= A6XX_SP_FS_CONFIG_ENABLED; + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A99E, 1); + tu_cs_emit(cs, 0x7fc0); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A9A8, 1); + tu_cs_emit(cs, 0); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_AB00, 1); + tu_cs_emit(cs, 0x5); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_CTRL_REG0, 1); + tu_cs_emit(cs, sp_fs_ctrl); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_CONFIG, 2); + tu_cs_emit(cs, sp_fs_config); + tu_cs_emit(cs, fs->instrlen); + + tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_FS_CNTL, 1); + tu_cs_emit(cs, A6XX_HLSQ_FS_CNTL_CONSTLEN(align(fs->constlen, 4)) | 0x100); +} + +static void +tu6_emit_vs_system_values(struct tu_cs *cs, + const struct ir3_shader_variant *vs) +{ + const uint32_t vertexid_regid = + ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); + const uint32_t instanceid_regid = + ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID); + + tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_1, 6); + tu_cs_emit(cs, A6XX_VFD_CONTROL_1_REGID4VTX(vertexid_regid) | + A6XX_VFD_CONTROL_1_REGID4INST(instanceid_regid) | + 0xfcfc0000); + tu_cs_emit(cs, 0x0000fcfc); /* VFD_CONTROL_2 */ + tu_cs_emit(cs, 0xfcfcfcfc); /* VFD_CONTROL_3 */ + tu_cs_emit(cs, 0x000000fc); /* VFD_CONTROL_4 */ + tu_cs_emit(cs, 0x0000fcfc); /* VFD_CONTROL_5 */ + tu_cs_emit(cs, 0x00000000); /* VFD_CONTROL_6 */ +} + +static void +tu6_emit_vpc(struct tu_cs *cs, + const struct ir3_shader_variant *vs, + const struct ir3_shader_variant *fs, + bool binning_pass) +{ + struct ir3_shader_linkage linkage = { 0 }; + ir3_link_shaders(&linkage, vs, fs); + + if (vs->shader->stream_output.num_outputs && !binning_pass) + tu_finishme("stream output"); + + BITSET_DECLARE(vpc_var_enables, 128) = { 0 }; + for (uint32_t i = 0; i < linkage.cnt; i++) { + const uint32_t comp_count = util_last_bit(linkage.var[i].compmask); + for (uint32_t j = 0; j < comp_count; j++) + BITSET_SET(vpc_var_enables, linkage.var[i].loc + j); + } + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4); + tu_cs_emit(cs, ~vpc_var_enables[0]); + tu_cs_emit(cs, ~vpc_var_enables[1]); + tu_cs_emit(cs, ~vpc_var_enables[2]); + tu_cs_emit(cs, ~vpc_var_enables[3]); + + /* a6xx finds position/pointsize at the end */ + const uint32_t position_regid = + ir3_find_output_regid(vs, VARYING_SLOT_POS); + const uint32_t pointsize_regid = + ir3_find_output_regid(vs, VARYING_SLOT_PSIZ); + uint32_t pointsize_loc = 0xff; + if (position_regid != regid(63, 0)) + ir3_link_add(&linkage, position_regid, 0xf, linkage.max_loc); + if (pointsize_regid != regid(63, 0)) { + pointsize_loc = linkage.max_loc; + ir3_link_add(&linkage, pointsize_regid, 0x1, linkage.max_loc); + } + + /* map vs outputs to VPC */ + assert(linkage.cnt <= 32); + const uint32_t sp_vs_out_count = (linkage.cnt + 1) / 2; + const uint32_t sp_vs_vpc_dst_count = (linkage.cnt + 3) / 4; + uint32_t sp_vs_out[16]; + uint32_t sp_vs_vpc_dst[8]; + sp_vs_out[sp_vs_out_count - 1] = 0; + sp_vs_vpc_dst[sp_vs_vpc_dst_count - 1] = 0; + for (uint32_t i = 0; i < linkage.cnt; i++) { + ((uint16_t *) sp_vs_out)[i] = + A6XX_SP_VS_OUT_REG_A_REGID(linkage.var[i].regid) | + A6XX_SP_VS_OUT_REG_A_COMPMASK(linkage.var[i].compmask); + ((uint8_t *) sp_vs_vpc_dst)[i] = + A6XX_SP_VS_VPC_DST_REG_OUTLOC0(linkage.var[i].loc); + } + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_OUT_REG(0), sp_vs_out_count); + tu_cs_emit_array(cs, sp_vs_out, sp_vs_out_count); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_VPC_DST_REG(0), sp_vs_vpc_dst_count); + tu_cs_emit_array(cs, sp_vs_vpc_dst, sp_vs_vpc_dst_count); + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_CNTL_0, 1); + tu_cs_emit(cs, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs->total_in) | + (fs->total_in > 0 ? A6XX_VPC_CNTL_0_VARYING : 0) | + 0xff00ff00); + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_PACK, 1); + tu_cs_emit(cs, A6XX_VPC_PACK_NUMNONPOSVAR(fs->total_in) | + A6XX_VPC_PACK_PSIZELOC(pointsize_loc) | + A6XX_VPC_PACK_STRIDE_IN_VPC(linkage.max_loc)); + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_SIV_CNTL, 1); + tu_cs_emit(cs, 0x0000ffff); /* XXX */ + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_PRIMITIVE_CNTL, 1); + tu_cs_emit(cs, A6XX_SP_PRIMITIVE_CNTL_VSOUT(linkage.cnt)); + + tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_1, 1); + tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC(linkage.max_loc) | + (vs->writes_psize ? A6XX_PC_PRIMITIVE_CNTL_1_PSIZE : 0)); +} + +static int +tu6_vpc_varying_mode(const struct ir3_shader_variant *fs, + uint32_t index, + uint8_t *interp_mode, + uint8_t *ps_repl_mode) +{ + enum + { + INTERP_SMOOTH = 0, + INTERP_FLAT = 1, + INTERP_ZERO = 2, + INTERP_ONE = 3, + }; + enum + { + PS_REPL_NONE = 0, + PS_REPL_S = 1, + PS_REPL_T = 2, + PS_REPL_ONE_MINUS_T = 3, + }; + + const uint32_t compmask = fs->inputs[index].compmask; + + /* NOTE: varyings are packed, so if compmask is 0xb then first, second, and + * fourth component occupy three consecutive varying slots + */ + int shift = 0; + *interp_mode = 0; + *ps_repl_mode = 0; + if (fs->inputs[index].slot == VARYING_SLOT_PNTC) { + if (compmask & 0x1) { + *ps_repl_mode |= PS_REPL_S << shift; + shift += 2; + } + if (compmask & 0x2) { + *ps_repl_mode |= PS_REPL_T << shift; + shift += 2; + } + if (compmask & 0x4) { + *interp_mode |= INTERP_ZERO << shift; + shift += 2; + } + if (compmask & 0x8) { + *interp_mode |= INTERP_ONE << 6; + shift += 2; + } + } else if ((fs->inputs[index].interpolate == INTERP_MODE_FLAT) || + fs->inputs[index].rasterflat) { + for (int i = 0; i < 4; i++) { + if (compmask & (1 << i)) { + *interp_mode |= INTERP_FLAT << shift; + shift += 2; + } + } + } + + return shift; +} + +static void +tu6_emit_vpc_varying_modes(struct tu_cs *cs, + const struct ir3_shader_variant *fs, + bool binning_pass) +{ + uint32_t interp_modes[8] = { 0 }; + uint32_t ps_repl_modes[8] = { 0 }; + + if (!binning_pass) { + for (int i = -1; + (i = ir3_next_varying(fs, i)) < (int) fs->inputs_count;) { + + /* get the mode for input i */ + uint8_t interp_mode; + uint8_t ps_repl_mode; + const int bits = + tu6_vpc_varying_mode(fs, i, &interp_mode, &ps_repl_mode); + + /* OR the mode into the array */ + const uint32_t inloc = fs->inputs[i].inloc * 2; + uint32_t n = inloc / 32; + uint32_t shift = inloc % 32; + interp_modes[n] |= interp_mode << shift; + ps_repl_modes[n] |= ps_repl_mode << shift; + if (shift + bits > 32) { + n++; + shift = 32 - shift; + + interp_modes[n] |= interp_mode >> shift; + ps_repl_modes[n] |= ps_repl_mode >> shift; + } + } + } + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8); + tu_cs_emit_array(cs, interp_modes, 8); + + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8); + tu_cs_emit_array(cs, ps_repl_modes, 8); +} + +static void +tu6_emit_fs_system_values(struct tu_cs *cs, + const struct ir3_shader_variant *fs) +{ + const uint32_t frontfacing_regid = + ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRONT_FACE); + const uint32_t sampleid_regid = + ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_ID); + const uint32_t samplemaskin_regid = + ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_MASK_IN); + const uint32_t fragcoord_xy_regid = + ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_COORD); + const uint32_t fragcoord_zw_regid = (fragcoord_xy_regid != regid(63, 0)) + ? (fragcoord_xy_regid + 2) + : fragcoord_xy_regid; + const uint32_t varyingcoord_regid = + ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PIXEL); + + tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CONTROL_1_REG, 5); + tu_cs_emit(cs, 0x7); + tu_cs_emit(cs, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(frontfacing_regid) | + A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(sampleid_regid) | + A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(samplemaskin_regid) | + A6XX_HLSQ_CONTROL_2_REG_SIZE(regid(63, 0))); + tu_cs_emit(cs, + A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_PIXEL(varyingcoord_regid) | + A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_CENTROID(regid(63, 0)) | + 0xfc00fc00); + tu_cs_emit(cs, + A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(fragcoord_xy_regid) | + A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(fragcoord_zw_regid) | + A6XX_HLSQ_CONTROL_4_REG_BARY_IJ_PIXEL_PERSAMP(regid(63, 0)) | + 0x0000fc00); + tu_cs_emit(cs, 0xfc); +} + +static void +tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UNKNOWN_B980, 1); + tu_cs_emit(cs, fs->total_in > 0 ? 3 : 1); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A982, 1); + tu_cs_emit(cs, 0); /* XXX */ + + tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1); + tu_cs_emit(cs, 0xff); /* XXX */ + + uint32_t gras_cntl = 0; + if (fs->total_in > 0) + gras_cntl |= A6XX_GRAS_CNTL_VARYING; + if (fs->frag_coord) { + gras_cntl |= A6XX_GRAS_CNTL_SIZE | A6XX_GRAS_CNTL_XCOORD | + A6XX_GRAS_CNTL_YCOORD | A6XX_GRAS_CNTL_ZCOORD | + A6XX_GRAS_CNTL_WCOORD; + } + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CNTL, 1); + tu_cs_emit(cs, gras_cntl); + + uint32_t rb_render_control = 0; + if (fs->total_in > 0) { + rb_render_control = + A6XX_RB_RENDER_CONTROL0_VARYING | A6XX_RB_RENDER_CONTROL0_UNK10; + } + if (fs->frag_coord) { + rb_render_control |= + A6XX_RB_RENDER_CONTROL0_SIZE | A6XX_RB_RENDER_CONTROL0_XCOORD | + A6XX_RB_RENDER_CONTROL0_YCOORD | A6XX_RB_RENDER_CONTROL0_ZCOORD | + A6XX_RB_RENDER_CONTROL0_WCOORD; + } + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_CONTROL0, 2); + tu_cs_emit(cs, rb_render_control); + tu_cs_emit(cs, (fs->frag_face ? A6XX_RB_RENDER_CONTROL1_FACENESS : 0)); +} + +static void +tu6_emit_fs_outputs(struct tu_cs *cs, + const struct ir3_shader_variant *fs, + uint32_t mrt_count) +{ + const uint32_t fragdepth_regid = + ir3_find_output_regid(fs, FRAG_RESULT_DEPTH); + uint32_t fragdata_regid[8]; + if (fs->color0_mrt) { + fragdata_regid[0] = ir3_find_output_regid(fs, FRAG_RESULT_COLOR); + for (uint32_t i = 1; i < ARRAY_SIZE(fragdata_regid); i++) + fragdata_regid[i] = fragdata_regid[0]; + } else { + for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++) + fragdata_regid[i] = ir3_find_output_regid(fs, FRAG_RESULT_DATA0 + i); + } + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); + tu_cs_emit( + cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(fragdepth_regid) | 0xfcfc0000); + tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count)); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), 8); + for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++) { + // TODO we could have a mix of half and full precision outputs, + // we really need to figure out half-precision from IR3_REG_HALF + tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(fragdata_regid[i]) | + (false ? A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION : 0)); + } + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2); + tu_cs_emit(cs, fs->writes_pos ? A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z : 0); + tu_cs_emit(cs, A6XX_RB_FS_OUTPUT_CNTL1_MRT(mrt_count)); + + uint32_t gras_su_depth_plane_cntl = 0; + uint32_t rb_depth_plane_cntl = 0; + if (fs->no_earlyz | fs->writes_pos) { + gras_su_depth_plane_cntl |= A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z; + rb_depth_plane_cntl |= A6XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z; + } + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL, 1); + tu_cs_emit(cs, gras_su_depth_plane_cntl); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_PLANE_CNTL, 1); + tu_cs_emit(cs, rb_depth_plane_cntl); +} + +static void +tu6_emit_shader_object(struct tu_cs *cs, + gl_shader_stage stage, + const struct ir3_shader_variant *variant, + const struct tu_bo *binary_bo, + uint32_t binary_offset) +{ + uint16_t reg; + uint8_t opcode; + enum a6xx_state_block sb; + switch (stage) { + case MESA_SHADER_VERTEX: + reg = REG_A6XX_SP_VS_OBJ_START_LO; + opcode = CP_LOAD_STATE6_GEOM; + sb = SB6_VS_SHADER; + break; + case MESA_SHADER_TESS_CTRL: + reg = REG_A6XX_SP_HS_OBJ_START_LO; + opcode = CP_LOAD_STATE6_GEOM; + sb = SB6_HS_SHADER; + break; + case MESA_SHADER_TESS_EVAL: + reg = REG_A6XX_SP_DS_OBJ_START_LO; + opcode = CP_LOAD_STATE6_GEOM; + sb = SB6_DS_SHADER; + break; + case MESA_SHADER_GEOMETRY: + reg = REG_A6XX_SP_GS_OBJ_START_LO; + opcode = CP_LOAD_STATE6_GEOM; + sb = SB6_GS_SHADER; + break; + case MESA_SHADER_FRAGMENT: + reg = REG_A6XX_SP_FS_OBJ_START_LO; + opcode = CP_LOAD_STATE6_FRAG; + sb = SB6_FS_SHADER; + break; + case MESA_SHADER_COMPUTE: + reg = REG_A6XX_SP_CS_OBJ_START_LO; + opcode = CP_LOAD_STATE6_FRAG; + sb = SB6_CS_SHADER; + break; + default: + unreachable("invalid gl_shader_stage"); + opcode = CP_LOAD_STATE6_GEOM; + sb = SB6_VS_SHADER; + break; + } + + if (!variant->instrlen) { + tu_cs_emit_pkt4(cs, reg, 2); + tu_cs_emit_qw(cs, 0); + return; + } + + assert(variant->type == stage); + + const uint64_t binary_iova = binary_bo->iova + binary_offset; + assert((binary_iova & 0x3) == 0); + + tu_cs_emit_pkt4(cs, reg, 2); + tu_cs_emit_qw(cs, binary_iova); + + /* always indirect */ + const bool indirect = true; + if (indirect) { + tu_cs_emit_pkt7(cs, opcode, 3); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(sb) | + CP_LOAD_STATE6_0_NUM_UNIT(variant->instrlen)); + tu_cs_emit_qw(cs, binary_iova); + } else { + const void *binary = binary_bo->map + binary_offset; + + tu_cs_emit_pkt7(cs, opcode, 3 + variant->info.sizedwords); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(sb) | + CP_LOAD_STATE6_0_NUM_UNIT(variant->instrlen)); + tu_cs_emit_qw(cs, 0); + tu_cs_emit_array(cs, binary, variant->info.sizedwords); + } +} + +static void +tu6_emit_program(struct tu_cs *cs, + const struct tu_pipeline_builder *builder, + const struct tu_bo *binary_bo, + bool binning_pass) +{ + static const struct ir3_shader_variant dummy_variant = { + .type = MESA_SHADER_NONE + }; + assert(builder->shaders[MESA_SHADER_VERTEX]); + const struct ir3_shader_variant *vs = + &builder->shaders[MESA_SHADER_VERTEX]->variants[0]; + const struct ir3_shader_variant *hs = + builder->shaders[MESA_SHADER_TESS_CTRL] + ? &builder->shaders[MESA_SHADER_TESS_CTRL]->variants[0] + : &dummy_variant; + const struct ir3_shader_variant *ds = + builder->shaders[MESA_SHADER_TESS_EVAL] + ? &builder->shaders[MESA_SHADER_TESS_EVAL]->variants[0] + : &dummy_variant; + const struct ir3_shader_variant *gs = + builder->shaders[MESA_SHADER_GEOMETRY] + ? &builder->shaders[MESA_SHADER_GEOMETRY]->variants[0] + : &dummy_variant; + const struct ir3_shader_variant *fs = + builder->shaders[MESA_SHADER_FRAGMENT] + ? &builder->shaders[MESA_SHADER_FRAGMENT]->variants[0] + : &dummy_variant; + + if (binning_pass) { + vs = &builder->shaders[MESA_SHADER_VERTEX]->variants[1]; + fs = &dummy_variant; + } + + tu6_emit_vs_config(cs, vs); + tu6_emit_hs_config(cs, hs); + tu6_emit_ds_config(cs, ds); + tu6_emit_gs_config(cs, gs); + tu6_emit_fs_config(cs, fs); + + tu6_emit_vs_system_values(cs, vs); + tu6_emit_vpc(cs, vs, fs, binning_pass); + tu6_emit_vpc_varying_modes(cs, fs, binning_pass); + tu6_emit_fs_system_values(cs, fs); + tu6_emit_fs_inputs(cs, fs); + tu6_emit_fs_outputs(cs, fs, builder->color_attachment_count); + + tu6_emit_shader_object(cs, MESA_SHADER_VERTEX, vs, binary_bo, + builder->shader_offsets[MESA_SHADER_VERTEX]); + + tu6_emit_shader_object(cs, MESA_SHADER_FRAGMENT, fs, binary_bo, + builder->shader_offsets[MESA_SHADER_FRAGMENT]); +} + +static void +tu6_emit_vertex_input(struct tu_cs *cs, + const struct ir3_shader_variant *vs, + const VkPipelineVertexInputStateCreateInfo *vi_info, + uint8_t bindings[MAX_VERTEX_ATTRIBS], + uint16_t strides[MAX_VERTEX_ATTRIBS], + uint16_t offsets[MAX_VERTEX_ATTRIBS], + uint32_t *count) +{ + uint32_t vfd_decode_idx = 0; + + /* why do we go beyond inputs_count? */ + assert(vs->inputs_count + 1 <= MAX_VERTEX_ATTRIBS); + for (uint32_t i = 0; i <= vs->inputs_count; i++) { + if (vs->inputs[i].sysval || !vs->inputs[i].compmask) + continue; + + const VkVertexInputAttributeDescription *vi_attr = + tu_find_vertex_input_attribute(vi_info, vs->inputs[i].slot); + const VkVertexInputBindingDescription *vi_binding = + tu_find_vertex_input_binding(vi_info, vi_attr); + assert(vi_attr && vi_binding); + + const struct tu_native_format *format = + tu6_get_native_format(vi_attr->format); + assert(format && format->vtx >= 0); + + uint32_t vfd_decode = A6XX_VFD_DECODE_INSTR_IDX(vfd_decode_idx) | + A6XX_VFD_DECODE_INSTR_FORMAT(format->vtx) | + A6XX_VFD_DECODE_INSTR_SWAP(format->swap) | + A6XX_VFD_DECODE_INSTR_UNK30; + if (vi_binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) + vfd_decode |= A6XX_VFD_DECODE_INSTR_INSTANCED; + if (!vk_format_is_int(vi_attr->format)) + vfd_decode |= A6XX_VFD_DECODE_INSTR_FLOAT; + + const uint32_t vfd_decode_step_rate = 1; + + const uint32_t vfd_dest_cntl = + A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vs->inputs[i].compmask) | + A6XX_VFD_DEST_CNTL_INSTR_REGID(vs->inputs[i].regid); + + tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DECODE(vfd_decode_idx), 2); + tu_cs_emit(cs, vfd_decode); + tu_cs_emit(cs, vfd_decode_step_rate); + + tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DEST_CNTL(vfd_decode_idx), 1); + tu_cs_emit(cs, vfd_dest_cntl); + + bindings[vfd_decode_idx] = vi_binding->binding; + strides[vfd_decode_idx] = vi_binding->stride; + offsets[vfd_decode_idx] = vi_attr->offset; + + vfd_decode_idx++; + } + + tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_0, 1); + tu_cs_emit( + cs, A6XX_VFD_CONTROL_0_VTXCNT(vfd_decode_idx) | (vfd_decode_idx << 8)); + + *count = vfd_decode_idx; +} + +static uint32_t +tu6_guardband_adj(uint32_t v) +{ + if (v > 256) + return (uint32_t)(511.0 - 65.0 * (log2(v) - 8.0)); + else + return 511; +} + +void +tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport) +{ + float offsets[3]; + float scales[3]; + scales[0] = viewport->width / 2.0f; + scales[1] = viewport->height / 2.0f; + scales[2] = viewport->maxDepth - viewport->minDepth; + offsets[0] = viewport->x + scales[0]; + offsets[1] = viewport->y + scales[1]; + offsets[2] = viewport->minDepth; + + VkOffset2D min; + VkOffset2D max; + min.x = (int32_t) viewport->x; + max.x = (int32_t) ceilf(viewport->x + viewport->width); + if (viewport->height >= 0.0f) { + min.y = (int32_t) viewport->y; + max.y = (int32_t) ceilf(viewport->y + viewport->height); + } else { + min.y = (int32_t)(viewport->y + viewport->height); + max.y = (int32_t) ceilf(viewport->y); + } + /* the spec allows viewport->height to be 0.0f */ + if (min.y == max.y) + max.y++; + assert(min.x >= 0 && min.x < max.x); + assert(min.y >= 0 && min.y < max.y); + + VkExtent2D guardband_adj; + guardband_adj.width = tu6_guardband_adj(max.x - min.x); + guardband_adj.height = tu6_guardband_adj(max.y - min.y); + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0, 6); + tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XOFFSET_0(offsets[0])); + tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XSCALE_0(scales[0])); + tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YOFFSET_0(offsets[1])); + tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YSCALE_0(scales[1])); + tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZOFFSET_0(offsets[2])); + tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZSCALE_0(scales[2])); + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2); + tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(min.x) | + A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(min.y)); + tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(max.x - 1) | + A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(max.y - 1)); + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1); + tu_cs_emit(cs, + A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband_adj.width) | + A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband_adj.height)); +} + +void +tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor) +{ + const VkOffset2D min = scissor->offset; + const VkOffset2D max = { + scissor->offset.x + scissor->extent.width, + scissor->offset.y + scissor->extent.height, + }; + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2); + tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(min.x) | + A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(min.y)); + tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(max.x - 1) | + A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max.y - 1)); +} + +static void +tu6_emit_gras_unknowns(struct tu_cs *cs) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8000, 1); + tu_cs_emit(cs, 0x80); + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8001, 1); + tu_cs_emit(cs, 0x0); + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8004, 1); + tu_cs_emit(cs, 0x0); +} + +static void +tu6_emit_point_size(struct tu_cs *cs) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POINT_MINMAX, 2); + tu_cs_emit(cs, A6XX_GRAS_SU_POINT_MINMAX_MIN(1.0f / 16.0f) | + A6XX_GRAS_SU_POINT_MINMAX_MAX(4092.0f)); + tu_cs_emit(cs, A6XX_GRAS_SU_POINT_SIZE(1.0f)); +} + +static uint32_t +tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info, + VkSampleCountFlagBits samples) +{ + uint32_t gras_su_cntl = 0; + + if (rast_info->cullMode & VK_CULL_MODE_FRONT_BIT) + gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT; + if (rast_info->cullMode & VK_CULL_MODE_BACK_BIT) + gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK; + + if (rast_info->frontFace == VK_FRONT_FACE_CLOCKWISE) + gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW; + + /* don't set A6XX_GRAS_SU_CNTL_LINEHALFWIDTH */ + + if (rast_info->depthBiasEnable) + gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET; + + if (samples > VK_SAMPLE_COUNT_1_BIT) + gras_su_cntl |= A6XX_GRAS_SU_CNTL_MSAA_ENABLE; + + return gras_su_cntl; +} + +void +tu6_emit_gras_su_cntl(struct tu_cs *cs, + uint32_t gras_su_cntl, + float line_width) +{ + assert((gras_su_cntl & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK) == 0); + gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(line_width / 2.0f); + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_CNTL, 1); + tu_cs_emit(cs, gras_su_cntl); +} + +void +tu6_emit_depth_bias(struct tu_cs *cs, + float constant_factor, + float clamp, + float slope_factor) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3); + tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor)); + tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor)); + tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp)); +} + +static void +tu6_emit_alpha_control_disable(struct tu_cs *cs) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_RB_ALPHA_CONTROL, 1); + tu_cs_emit(cs, 0); +} + +static void +tu6_emit_depth_control(struct tu_cs *cs, + const VkPipelineDepthStencilStateCreateInfo *ds_info) +{ + assert(!ds_info->depthBoundsTestEnable); + + uint32_t rb_depth_cntl = 0; + if (ds_info->depthTestEnable) { + rb_depth_cntl |= + A6XX_RB_DEPTH_CNTL_Z_ENABLE | + A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) | + A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; + + if (ds_info->depthWriteEnable) + rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; + } + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_CNTL, 1); + tu_cs_emit(cs, rb_depth_cntl); +} + +static void +tu6_emit_stencil_control(struct tu_cs *cs, + const VkPipelineDepthStencilStateCreateInfo *ds_info) +{ + uint32_t rb_stencil_control = 0; + if (ds_info->stencilTestEnable) { + const VkStencilOpState *front = &ds_info->front; + const VkStencilOpState *back = &ds_info->back; + rb_stencil_control |= + A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | + A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | + A6XX_RB_STENCIL_CONTROL_STENCIL_READ | + A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) | + A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) | + A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) | + A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) | + A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) | + A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) | + A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) | + A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp)); + } + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_CONTROL, 1); + tu_cs_emit(cs, rb_stencil_control); +} + +void +tu6_emit_stencil_compare_mask(struct tu_cs *cs, uint32_t front, uint32_t back) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILMASK, 1); + tu_cs_emit( + cs, A6XX_RB_STENCILMASK_MASK(front) | A6XX_RB_STENCILMASK_BFMASK(back)); +} + +void +tu6_emit_stencil_write_mask(struct tu_cs *cs, uint32_t front, uint32_t back) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILWRMASK, 1); + tu_cs_emit(cs, A6XX_RB_STENCILWRMASK_WRMASK(front) | + A6XX_RB_STENCILWRMASK_BFWRMASK(back)); +} + +void +tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILREF, 1); + tu_cs_emit(cs, + A6XX_RB_STENCILREF_REF(front) | A6XX_RB_STENCILREF_BFREF(back)); +} + +static uint32_t +tu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att, + bool has_alpha) +{ + const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->colorBlendOp); + const enum adreno_rb_blend_factor src_color_factor = tu6_blend_factor( + has_alpha ? att->srcColorBlendFactor + : tu_blend_factor_no_dst_alpha(att->srcColorBlendFactor)); + const enum adreno_rb_blend_factor dst_color_factor = tu6_blend_factor( + has_alpha ? att->dstColorBlendFactor + : tu_blend_factor_no_dst_alpha(att->dstColorBlendFactor)); + const enum a3xx_rb_blend_opcode alpha_op = tu6_blend_op(att->alphaBlendOp); + const enum adreno_rb_blend_factor src_alpha_factor = + tu6_blend_factor(att->srcAlphaBlendFactor); + const enum adreno_rb_blend_factor dst_alpha_factor = + tu6_blend_factor(att->dstAlphaBlendFactor); + + return A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(src_color_factor) | + A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(color_op) | + A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(dst_color_factor) | + A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(src_alpha_factor) | + A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(alpha_op) | + A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(dst_alpha_factor); +} + +static uint32_t +tu6_rb_mrt_control(const VkPipelineColorBlendAttachmentState *att, + uint32_t rb_mrt_control_rop, + bool is_int, + bool has_alpha) +{ + uint32_t rb_mrt_control = + A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(att->colorWriteMask); + + /* ignore blending and logic op for integer attachments */ + if (is_int) { + rb_mrt_control |= A6XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); + return rb_mrt_control; + } + + rb_mrt_control |= rb_mrt_control_rop; + + if (att->blendEnable) { + rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND; + + if (has_alpha) + rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND2; + } + + return rb_mrt_control; +} + +static void +tu6_emit_rb_mrt_controls(struct tu_cs *cs, + const VkPipelineColorBlendStateCreateInfo *blend_info, + const VkFormat attachment_formats[MAX_RTS], + uint32_t *blend_enable_mask) +{ + *blend_enable_mask = 0; + + bool rop_reads_dst = false; + uint32_t rb_mrt_control_rop = 0; + if (blend_info->logicOpEnable) { + rop_reads_dst = tu_logic_op_reads_dst(blend_info->logicOp); + rb_mrt_control_rop = + A6XX_RB_MRT_CONTROL_ROP_ENABLE | + A6XX_RB_MRT_CONTROL_ROP_CODE(tu6_rop(blend_info->logicOp)); + } + + for (uint32_t i = 0; i < blend_info->attachmentCount; i++) { + const VkPipelineColorBlendAttachmentState *att = + &blend_info->pAttachments[i]; + const VkFormat format = attachment_formats[i]; + + uint32_t rb_mrt_control = 0; + uint32_t rb_mrt_blend_control = 0; + if (format != VK_FORMAT_UNDEFINED) { + const bool is_int = vk_format_is_int(format); + const bool has_alpha = vk_format_has_alpha(format); + + rb_mrt_control = + tu6_rb_mrt_control(att, rb_mrt_control_rop, is_int, has_alpha); + rb_mrt_blend_control = tu6_rb_mrt_blend_control(att, has_alpha); + + if (att->blendEnable || rop_reads_dst) + *blend_enable_mask |= 1 << i; + } + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_CONTROL(i), 2); + tu_cs_emit(cs, rb_mrt_control); + tu_cs_emit(cs, rb_mrt_blend_control); + } + + for (uint32_t i = blend_info->attachmentCount; i < MAX_RTS; i++) { + tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_CONTROL(i), 2); + tu_cs_emit(cs, 0); + tu_cs_emit(cs, 0); + } +} + +static void +tu6_emit_blend_control(struct tu_cs *cs, + uint32_t blend_enable_mask, + const VkPipelineMultisampleStateCreateInfo *msaa_info) +{ + assert(!msaa_info->sampleShadingEnable); + assert(!msaa_info->alphaToOneEnable); + + uint32_t sp_blend_cntl = A6XX_SP_BLEND_CNTL_UNK8; + if (blend_enable_mask) + sp_blend_cntl |= A6XX_SP_BLEND_CNTL_ENABLED; + if (msaa_info->alphaToCoverageEnable) + sp_blend_cntl |= A6XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE; + + const uint32_t sample_mask = + msaa_info->pSampleMask ? *msaa_info->pSampleMask + : ((1 << msaa_info->rasterizationSamples) - 1); + + /* set A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND only when enabled? */ + uint32_t rb_blend_cntl = + A6XX_RB_BLEND_CNTL_ENABLE_BLEND(blend_enable_mask) | + A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND | + A6XX_RB_BLEND_CNTL_SAMPLE_MASK(sample_mask); + if (msaa_info->alphaToCoverageEnable) + rb_blend_cntl |= A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE; + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_BLEND_CNTL, 1); + tu_cs_emit(cs, sp_blend_cntl); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLEND_CNTL, 1); + tu_cs_emit(cs, rb_blend_cntl); +} + +void +tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4]) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLEND_RED_F32, 4); + tu_cs_emit_array(cs, (const uint32_t *) constants, 4); +} + +static VkResult +tu_pipeline_builder_create_pipeline(struct tu_pipeline_builder *builder, + struct tu_pipeline **out_pipeline) +{ + struct tu_device *dev = builder->device; + + struct tu_pipeline *pipeline = + vk_zalloc2(&dev->alloc, builder->alloc, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!pipeline) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + tu_cs_init(&pipeline->cs, TU_CS_MODE_SUB_STREAM, 2048); + + /* reserve the space now such that tu_cs_begin_sub_stream never fails */ + VkResult result = tu_cs_reserve_space(dev, &pipeline->cs, 2048); + if (result != VK_SUCCESS) { + vk_free2(&dev->alloc, builder->alloc, pipeline); + return result; + } + + *out_pipeline = pipeline; + + return VK_SUCCESS; +} + +static VkResult +tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder) +{ + const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = { + NULL + }; + for (uint32_t i = 0; i < builder->create_info->stageCount; i++) { + gl_shader_stage stage = + tu_shader_stage(builder->create_info->pStages[i].stage); + stage_infos[stage] = &builder->create_info->pStages[i]; + } + + struct tu_shader_compile_options options; + tu_shader_compile_options_init(&options, builder->create_info); + + /* compile shaders in reverse order */ + struct tu_shader *next_stage_shader = NULL; + for (gl_shader_stage stage = MESA_SHADER_STAGES - 1; + stage > MESA_SHADER_NONE; stage--) { + const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage]; + if (!stage_info) + continue; + + struct tu_shader *shader = + tu_shader_create(builder->device, stage, stage_info, builder->alloc); + if (!shader) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + VkResult result = + tu_shader_compile(builder->device, shader, next_stage_shader, + &options, builder->alloc); + if (result != VK_SUCCESS) + return result; + + builder->shaders[stage] = shader; + builder->shader_offsets[stage] = builder->shader_total_size; + builder->shader_total_size += + sizeof(uint32_t) * shader->variants[0].info.sizedwords; + + next_stage_shader = shader; + } + + if (builder->shaders[MESA_SHADER_VERTEX]->has_binning_pass) { + const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; + builder->binning_vs_offset = builder->shader_total_size; + builder->shader_total_size += + sizeof(uint32_t) * vs->variants[1].info.sizedwords; + } + + return VK_SUCCESS; +} + +static VkResult +tu_pipeline_builder_upload_shaders(struct tu_pipeline_builder *builder, + struct tu_pipeline *pipeline) +{ + struct tu_bo *bo = &pipeline->program.binary_bo; + + VkResult result = + tu_bo_init_new(builder->device, bo, builder->shader_total_size); + if (result != VK_SUCCESS) + return result; + + result = tu_bo_map(builder->device, bo); + if (result != VK_SUCCESS) + return result; + + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + const struct tu_shader *shader = builder->shaders[i]; + if (!shader) + continue; + + memcpy(bo->map + builder->shader_offsets[i], shader->binary, + sizeof(uint32_t) * shader->variants[0].info.sizedwords); + } + + if (builder->shaders[MESA_SHADER_VERTEX]->has_binning_pass) { + const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; + memcpy(bo->map + builder->binning_vs_offset, vs->binning_binary, + sizeof(uint32_t) * vs->variants[1].info.sizedwords); + } + + return VK_SUCCESS; +} + +static void +tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder, + struct tu_pipeline *pipeline) +{ + const VkPipelineDynamicStateCreateInfo *dynamic_info = + builder->create_info->pDynamicState; + + if (!dynamic_info) + return; + + for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) { + pipeline->dynamic_state.mask |= + tu_dynamic_state_bit(dynamic_info->pDynamicStates[i]); + } +} + +static void +tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, + struct tu_pipeline *pipeline) +{ + struct tu_cs prog_cs; + tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 512, &prog_cs); + tu6_emit_program(&prog_cs, builder, &pipeline->program.binary_bo, false); + pipeline->program.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &prog_cs); + + tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 512, &prog_cs); + tu6_emit_program(&prog_cs, builder, &pipeline->program.binary_bo, true); + pipeline->program.binning_state_ib = + tu_cs_end_sub_stream(&pipeline->cs, &prog_cs); +} + +static void +tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder, + struct tu_pipeline *pipeline) +{ + const VkPipelineVertexInputStateCreateInfo *vi_info = + builder->create_info->pVertexInputState; + const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; + + struct tu_cs vi_cs; + tu_cs_begin_sub_stream(builder->device, &pipeline->cs, + MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs); + tu6_emit_vertex_input(&vi_cs, &vs->variants[0], vi_info, + pipeline->vi.bindings, pipeline->vi.strides, + pipeline->vi.offsets, &pipeline->vi.count); + pipeline->vi.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vi_cs); + + if (vs->has_binning_pass) { + tu_cs_begin_sub_stream(builder->device, &pipeline->cs, + MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs); + tu6_emit_vertex_input( + &vi_cs, &vs->variants[1], vi_info, pipeline->vi.binning_bindings, + pipeline->vi.binning_strides, pipeline->vi.binning_offsets, + &pipeline->vi.binning_count); + pipeline->vi.binning_state_ib = + tu_cs_end_sub_stream(&pipeline->cs, &vi_cs); + } +} + +static void +tu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder *builder, + struct tu_pipeline *pipeline) +{ + const VkPipelineInputAssemblyStateCreateInfo *ia_info = + builder->create_info->pInputAssemblyState; + + pipeline->ia.primtype = tu6_primtype(ia_info->topology); + pipeline->ia.primitive_restart = ia_info->primitiveRestartEnable; +} + +static void +tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder, + struct tu_pipeline *pipeline) +{ + /* The spec says: + * + * pViewportState is a pointer to an instance of the + * VkPipelineViewportStateCreateInfo structure, and is ignored if the + * pipeline has rasterization disabled." + * + * We leave the relevant registers stale in that case. + */ + if (builder->rasterizer_discard) + return; + + const VkPipelineViewportStateCreateInfo *vp_info = + builder->create_info->pViewportState; + + struct tu_cs vp_cs; + tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 15, &vp_cs); + + if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_VIEWPORT)) { + assert(vp_info->viewportCount == 1); + tu6_emit_viewport(&vp_cs, vp_info->pViewports); + } + + if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SCISSOR)) { + assert(vp_info->scissorCount == 1); + tu6_emit_scissor(&vp_cs, vp_info->pScissors); + } + + pipeline->vp.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vp_cs); +} + +static void +tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, + struct tu_pipeline *pipeline) +{ + const VkPipelineRasterizationStateCreateInfo *rast_info = + builder->create_info->pRasterizationState; + + assert(!rast_info->depthClampEnable); + assert(rast_info->polygonMode == VK_POLYGON_MODE_FILL); + + struct tu_cs rast_cs; + tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 20, &rast_cs); + + /* move to hw ctx init? */ + tu6_emit_gras_unknowns(&rast_cs); + tu6_emit_point_size(&rast_cs); + + const uint32_t gras_su_cntl = + tu6_gras_su_cntl(rast_info, builder->samples); + + if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) + tu6_emit_gras_su_cntl(&rast_cs, gras_su_cntl, rast_info->lineWidth); + + if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_DEPTH_BIAS)) { + tu6_emit_depth_bias(&rast_cs, rast_info->depthBiasConstantFactor, + rast_info->depthBiasClamp, + rast_info->depthBiasSlopeFactor); + } + + pipeline->rast.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &rast_cs); + + pipeline->rast.gras_su_cntl = gras_su_cntl; +} + +static void +tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, + struct tu_pipeline *pipeline) +{ + /* The spec says: + * + * pDepthStencilState is a pointer to an instance of the + * VkPipelineDepthStencilStateCreateInfo structure, and is ignored if + * the pipeline has rasterization disabled or if the subpass of the + * render pass the pipeline is created against does not use a + * depth/stencil attachment. + * + * We disable both depth and stenil tests in those cases. + */ + static const VkPipelineDepthStencilStateCreateInfo dummy_ds_info; + const VkPipelineDepthStencilStateCreateInfo *ds_info = + builder->use_depth_stencil_attachment + ? builder->create_info->pDepthStencilState + : &dummy_ds_info; + + struct tu_cs ds_cs; + tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 12, &ds_cs); + + /* move to hw ctx init? */ + tu6_emit_alpha_control_disable(&ds_cs); + + tu6_emit_depth_control(&ds_cs, ds_info); + tu6_emit_stencil_control(&ds_cs, ds_info); + + if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) { + tu6_emit_stencil_compare_mask(&ds_cs, ds_info->front.compareMask, + ds_info->back.compareMask); + } + if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) { + tu6_emit_stencil_write_mask(&ds_cs, ds_info->front.writeMask, + ds_info->back.writeMask); + } + if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) { + tu6_emit_stencil_reference(&ds_cs, ds_info->front.reference, + ds_info->back.reference); + } + + pipeline->ds.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &ds_cs); +} + +static void +tu_pipeline_builder_parse_multisample_and_color_blend( + struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) +{ + /* The spec says: + * + * pMultisampleState is a pointer to an instance of the + * VkPipelineMultisampleStateCreateInfo, and is ignored if the pipeline + * has rasterization disabled. + * + * Also, + * + * pColorBlendState is a pointer to an instance of the + * VkPipelineColorBlendStateCreateInfo structure, and is ignored if the + * pipeline has rasterization disabled or if the subpass of the render + * pass the pipeline is created against does not use any color + * attachments. + * + * We leave the relevant registers stale when rasterization is disabled. + */ + if (builder->rasterizer_discard) + return; + + static const VkPipelineColorBlendStateCreateInfo dummy_blend_info; + const VkPipelineMultisampleStateCreateInfo *msaa_info = + builder->create_info->pMultisampleState; + const VkPipelineColorBlendStateCreateInfo *blend_info = + builder->use_color_attachments ? builder->create_info->pColorBlendState + : &dummy_blend_info; + + struct tu_cs blend_cs; + tu_cs_begin_sub_stream(builder->device, &pipeline->cs, MAX_RTS * 3 + 9, + &blend_cs); + + uint32_t blend_enable_mask; + tu6_emit_rb_mrt_controls(&blend_cs, blend_info, + builder->color_attachment_formats, + &blend_enable_mask); + + if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_BLEND_CONSTANTS)) + tu6_emit_blend_constants(&blend_cs, blend_info->blendConstants); + + tu6_emit_blend_control(&blend_cs, blend_enable_mask, msaa_info); + + pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &blend_cs); +} + +static void +tu_pipeline_finish(struct tu_pipeline *pipeline, + struct tu_device *dev, + const VkAllocationCallbacks *alloc) +{ + tu_cs_finish(dev, &pipeline->cs); + + if (pipeline->program.binary_bo.gem_handle) + tu_bo_finish(dev, &pipeline->program.binary_bo); +} + +static VkResult +tu_pipeline_builder_build(struct tu_pipeline_builder *builder, + struct tu_pipeline **pipeline) +{ + VkResult result = tu_pipeline_builder_create_pipeline(builder, pipeline); + if (result != VK_SUCCESS) + return result; + + /* compile and upload shaders */ + result = tu_pipeline_builder_compile_shaders(builder); + if (result == VK_SUCCESS) + result = tu_pipeline_builder_upload_shaders(builder, *pipeline); + if (result != VK_SUCCESS) { + tu_pipeline_finish(*pipeline, builder->device, builder->alloc); + vk_free2(&builder->device->alloc, builder->alloc, *pipeline); + *pipeline = VK_NULL_HANDLE; + + return result; + } + + tu_pipeline_builder_parse_dynamic(builder, *pipeline); + tu_pipeline_builder_parse_shader_stages(builder, *pipeline); + tu_pipeline_builder_parse_vertex_input(builder, *pipeline); + tu_pipeline_builder_parse_input_assembly(builder, *pipeline); + tu_pipeline_builder_parse_viewport(builder, *pipeline); + tu_pipeline_builder_parse_rasterization(builder, *pipeline); + tu_pipeline_builder_parse_depth_stencil(builder, *pipeline); + tu_pipeline_builder_parse_multisample_and_color_blend(builder, *pipeline); + + /* we should have reserved enough space upfront such that the CS never + * grows + */ + assert((*pipeline)->cs.bo_count == 1); + + return VK_SUCCESS; +} + +static void +tu_pipeline_builder_finish(struct tu_pipeline_builder *builder) +{ + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + if (!builder->shaders[i]) + continue; + tu_shader_destroy(builder->device, builder->shaders[i], builder->alloc); + } +} + +static void +tu_pipeline_builder_init_graphics( + struct tu_pipeline_builder *builder, + struct tu_device *dev, + struct tu_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *create_info, + const VkAllocationCallbacks *alloc) +{ + *builder = (struct tu_pipeline_builder) { + .device = dev, + .cache = cache, + .create_info = create_info, + .alloc = alloc, + }; + + builder->rasterizer_discard = + create_info->pRasterizationState->rasterizerDiscardEnable; + + if (builder->rasterizer_discard) { + builder->samples = VK_SAMPLE_COUNT_1_BIT; + } else { + builder->samples = create_info->pMultisampleState->rasterizationSamples; + + const struct tu_render_pass *pass = + tu_render_pass_from_handle(create_info->renderPass); + const struct tu_subpass *subpass = + &pass->subpasses[create_info->subpass]; + + builder->use_depth_stencil_attachment = + subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED; + + assert(subpass->color_count == + create_info->pColorBlendState->attachmentCount); + builder->color_attachment_count = subpass->color_count; + for (uint32_t i = 0; i < subpass->color_count; i++) { + const uint32_t a = subpass->color_attachments[i].attachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; + + builder->color_attachment_formats[i] = pass->attachments[a].format; + builder->use_color_attachments = true; + } + } +} + +VkResult +tu_CreateGraphicsPipelines(VkDevice device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + TU_FROM_HANDLE(tu_device, dev, device); + TU_FROM_HANDLE(tu_pipeline_cache, cache, pipelineCache); + + for (uint32_t i = 0; i < count; i++) { + struct tu_pipeline_builder builder; + tu_pipeline_builder_init_graphics(&builder, dev, cache, + &pCreateInfos[i], pAllocator); + + struct tu_pipeline *pipeline; + VkResult result = tu_pipeline_builder_build(&builder, &pipeline); + tu_pipeline_builder_finish(&builder); + + if (result != VK_SUCCESS) { + for (uint32_t j = 0; j < i; j++) { + tu_DestroyPipeline(device, pPipelines[j], pAllocator); + pPipelines[j] = VK_NULL_HANDLE; + } + + return result; + } + + pPipelines[i] = tu_pipeline_to_handle(pipeline); + } + + return VK_SUCCESS; +} + +static VkResult +tu_compute_pipeline_create(VkDevice _device, + VkPipelineCache _cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipeline) +{ + return VK_SUCCESS; +} + +VkResult +tu_CreateComputePipelines(VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + VkResult r; + r = tu_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], + pAllocator, &pPipelines[i]); + if (r != VK_SUCCESS) { + result = r; + pPipelines[i] = VK_NULL_HANDLE; + } + } + + return result; +} + +void +tu_DestroyPipeline(VkDevice _device, + VkPipeline _pipeline, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, dev, _device); + TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline); + + if (!_pipeline) + return; + + tu_pipeline_finish(pipeline, dev, pAllocator); + vk_free2(&dev->alloc, pAllocator, pipeline); +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_pipeline_cache.c b/lib/mesa/src/freedreno/vulkan/tu_pipeline_cache.c new file mode 100644 index 000000000..b8b2ceda2 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_pipeline_cache.c @@ -0,0 +1,422 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include "util/debug.h" +#include "util/disk_cache.h" +#include "util/mesa-sha1.h" +#include "util/u_atomic.h" + +struct cache_entry_variant_info +{ +}; + +struct cache_entry +{ + union { + unsigned char sha1[20]; + uint32_t sha1_dw[5]; + }; + uint32_t code_sizes[MESA_SHADER_STAGES]; + struct tu_shader_variant *variants[MESA_SHADER_STAGES]; + char code[0]; +}; + +void +tu_pipeline_cache_init(struct tu_pipeline_cache *cache, + struct tu_device *device) +{ + cache->device = device; + pthread_mutex_init(&cache->mutex, NULL); + + cache->modified = false; + cache->kernel_count = 0; + cache->total_size = 0; + cache->table_size = 1024; + const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); + cache->hash_table = malloc(byte_size); + + /* We don't consider allocation failure fatal, we just start with a 0-sized + * cache. Disable caching when we want to keep shader debug info, since + * we don't get the debug info on cached shaders. */ + if (cache->hash_table == NULL) + cache->table_size = 0; + else + memset(cache->hash_table, 0, byte_size); +} + +void +tu_pipeline_cache_finish(struct tu_pipeline_cache *cache) +{ + for (unsigned i = 0; i < cache->table_size; ++i) + if (cache->hash_table[i]) { + vk_free(&cache->alloc, cache->hash_table[i]); + } + pthread_mutex_destroy(&cache->mutex); + free(cache->hash_table); +} + +static uint32_t +entry_size(struct cache_entry *entry) +{ + size_t ret = sizeof(*entry); + for (int i = 0; i < MESA_SHADER_STAGES; ++i) + if (entry->code_sizes[i]) + ret += + sizeof(struct cache_entry_variant_info) + entry->code_sizes[i]; + return ret; +} + +void +tu_hash_shaders(unsigned char *hash, + const VkPipelineShaderStageCreateInfo **stages, + const struct tu_pipeline_layout *layout, + const struct tu_pipeline_key *key, + uint32_t flags) +{ + struct mesa_sha1 ctx; + + _mesa_sha1_init(&ctx); + if (key) + _mesa_sha1_update(&ctx, key, sizeof(*key)); + if (layout) + _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); + + for (int i = 0; i < MESA_SHADER_STAGES; ++i) { + if (stages[i]) { + TU_FROM_HANDLE(tu_shader_module, module, stages[i]->module); + const VkSpecializationInfo *spec_info = + stages[i]->pSpecializationInfo; + + _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1)); + _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName)); + if (spec_info) { + _mesa_sha1_update( + &ctx, spec_info->pMapEntries, + spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); + _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize); + } + } + } + _mesa_sha1_update(&ctx, &flags, 4); + _mesa_sha1_final(&ctx, hash); +} + +static struct cache_entry * +tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache, + const unsigned char *sha1) +{ + const uint32_t mask = cache->table_size - 1; + const uint32_t start = (*(uint32_t *) sha1); + + if (cache->table_size == 0) + return NULL; + + for (uint32_t i = 0; i < cache->table_size; i++) { + const uint32_t index = (start + i) & mask; + struct cache_entry *entry = cache->hash_table[index]; + + if (!entry) + return NULL; + + if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { + return entry; + } + } + + unreachable("hash table should never be full"); +} + +static struct cache_entry * +tu_pipeline_cache_search(struct tu_pipeline_cache *cache, + const unsigned char *sha1) +{ + struct cache_entry *entry; + + pthread_mutex_lock(&cache->mutex); + + entry = tu_pipeline_cache_search_unlocked(cache, sha1); + + pthread_mutex_unlock(&cache->mutex); + + return entry; +} + +static void +tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache, + struct cache_entry *entry) +{ + const uint32_t mask = cache->table_size - 1; + const uint32_t start = entry->sha1_dw[0]; + + /* We'll always be able to insert when we get here. */ + assert(cache->kernel_count < cache->table_size / 2); + + for (uint32_t i = 0; i < cache->table_size; i++) { + const uint32_t index = (start + i) & mask; + if (!cache->hash_table[index]) { + cache->hash_table[index] = entry; + break; + } + } + + cache->total_size += entry_size(entry); + cache->kernel_count++; +} + +static VkResult +tu_pipeline_cache_grow(struct tu_pipeline_cache *cache) +{ + const uint32_t table_size = cache->table_size * 2; + const uint32_t old_table_size = cache->table_size; + const size_t byte_size = table_size * sizeof(cache->hash_table[0]); + struct cache_entry **table; + struct cache_entry **old_table = cache->hash_table; + + table = malloc(byte_size); + if (table == NULL) + return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + cache->hash_table = table; + cache->table_size = table_size; + cache->kernel_count = 0; + cache->total_size = 0; + + memset(cache->hash_table, 0, byte_size); + for (uint32_t i = 0; i < old_table_size; i++) { + struct cache_entry *entry = old_table[i]; + if (!entry) + continue; + + tu_pipeline_cache_set_entry(cache, entry); + } + + free(old_table); + + return VK_SUCCESS; +} + +static void +tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache, + struct cache_entry *entry) +{ + if (cache->kernel_count == cache->table_size / 2) + tu_pipeline_cache_grow(cache); + + /* Failing to grow that hash table isn't fatal, but may mean we don't + * have enough space to add this new kernel. Only add it if there's room. + */ + if (cache->kernel_count < cache->table_size / 2) + tu_pipeline_cache_set_entry(cache, entry); +} + +struct cache_header +{ + uint32_t header_size; + uint32_t header_version; + uint32_t vendor_id; + uint32_t device_id; + uint8_t uuid[VK_UUID_SIZE]; +}; + +void +tu_pipeline_cache_load(struct tu_pipeline_cache *cache, + const void *data, + size_t size) +{ + struct tu_device *device = cache->device; + struct cache_header header; + + if (size < sizeof(header)) + return; + memcpy(&header, data, sizeof(header)); + if (header.header_size < sizeof(header)) + return; + if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) + return; + if (header.vendor_id != 0 /* TODO */) + return; + if (header.device_id != 0 /* TODO */) + return; + if (memcmp(header.uuid, device->physical_device->cache_uuid, + VK_UUID_SIZE) != 0) + return; + + char *end = (void *) data + size; + char *p = (void *) data + header.header_size; + + while (end - p >= sizeof(struct cache_entry)) { + struct cache_entry *entry = (struct cache_entry *) p; + struct cache_entry *dest_entry; + size_t size = entry_size(entry); + if (end - p < size) + break; + + dest_entry = + vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); + if (dest_entry) { + memcpy(dest_entry, entry, size); + for (int i = 0; i < MESA_SHADER_STAGES; ++i) + dest_entry->variants[i] = NULL; + tu_pipeline_cache_add_entry(cache, dest_entry); + } + p += size; + } +} + +VkResult +tu_CreatePipelineCache(VkDevice _device, + const VkPipelineCacheCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineCache *pPipelineCache) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_pipeline_cache *cache; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + cache = vk_alloc2(&device->alloc, pAllocator, sizeof(*cache), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cache == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + if (pAllocator) + cache->alloc = *pAllocator; + else + cache->alloc = device->alloc; + + tu_pipeline_cache_init(cache, device); + + if (pCreateInfo->initialDataSize > 0) { + tu_pipeline_cache_load(cache, pCreateInfo->pInitialData, + pCreateInfo->initialDataSize); + } + + *pPipelineCache = tu_pipeline_cache_to_handle(cache); + + return VK_SUCCESS; +} + +void +tu_DestroyPipelineCache(VkDevice _device, + VkPipelineCache _cache, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache); + + if (!cache) + return; + tu_pipeline_cache_finish(cache); + + vk_free2(&device->alloc, pAllocator, cache); +} + +VkResult +tu_GetPipelineCacheData(VkDevice _device, + VkPipelineCache _cache, + size_t *pDataSize, + void *pData) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache); + struct cache_header *header; + VkResult result = VK_SUCCESS; + + pthread_mutex_lock(&cache->mutex); + + const size_t size = sizeof(*header) + cache->total_size; + if (pData == NULL) { + pthread_mutex_unlock(&cache->mutex); + *pDataSize = size; + return VK_SUCCESS; + } + if (*pDataSize < sizeof(*header)) { + pthread_mutex_unlock(&cache->mutex); + *pDataSize = 0; + return VK_INCOMPLETE; + } + void *p = pData, *end = pData + *pDataSize; + header = p; + header->header_size = sizeof(*header); + header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; + header->vendor_id = 0 /* TODO */; + header->device_id = 0 /* TODO */; + memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE); + p += header->header_size; + + struct cache_entry *entry; + for (uint32_t i = 0; i < cache->table_size; i++) { + if (!cache->hash_table[i]) + continue; + entry = cache->hash_table[i]; + const uint32_t size = entry_size(entry); + if (end < p + size) { + result = VK_INCOMPLETE; + break; + } + + memcpy(p, entry, size); + for (int j = 0; j < MESA_SHADER_STAGES; ++j) + ((struct cache_entry *) p)->variants[j] = NULL; + p += size; + } + *pDataSize = p - pData; + + pthread_mutex_unlock(&cache->mutex); + return result; +} + +static void +tu_pipeline_cache_merge(struct tu_pipeline_cache *dst, + struct tu_pipeline_cache *src) +{ + for (uint32_t i = 0; i < src->table_size; i++) { + struct cache_entry *entry = src->hash_table[i]; + if (!entry || tu_pipeline_cache_search(dst, entry->sha1)) + continue; + + tu_pipeline_cache_add_entry(dst, entry); + + src->hash_table[i] = NULL; + } +} + +VkResult +tu_MergePipelineCaches(VkDevice _device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache *pSrcCaches) +{ + TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache); + + for (uint32_t i = 0; i < srcCacheCount; i++) { + TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]); + + tu_pipeline_cache_merge(dst, src); + } + + return VK_SUCCESS; +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_private.h b/lib/mesa/src/freedreno/vulkan/tu_private.h new file mode 100644 index 000000000..c2440471f --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_private.h @@ -0,0 +1,1556 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef TU_PRIVATE_H +#define TU_PRIVATE_H + +#include <assert.h> +#include <pthread.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifdef HAVE_VALGRIND +#include <memcheck.h> +#include <valgrind.h> +#define VG(x) x +#else +#define VG(x) +#endif + +#include "c11/threads.h" +#include "compiler/shader_enums.h" +#include "main/macros.h" +#include "util/list.h" +#include "util/macros.h" +#include "vk_alloc.h" +#include "vk_debug_report.h" +#include "wsi_common.h" + +#include "drm-uapi/msm_drm.h" +#include "ir3/ir3_compiler.h" +#include "ir3/ir3_shader.h" + +#include "adreno_common.xml.h" +#include "adreno_pm4.xml.h" +#include "a6xx.xml.h" + +#include "tu_descriptor_set.h" +#include "tu_extensions.h" + +/* Pre-declarations needed for WSI entrypoints */ +struct wl_surface; +struct wl_display; +typedef struct xcb_connection_t xcb_connection_t; +typedef uint32_t xcb_visualid_t; +typedef uint32_t xcb_window_t; + +#include <vulkan/vk_android_native_buffer.h> +#include <vulkan/vk_icd.h> +#include <vulkan/vulkan.h> +#include <vulkan/vulkan_intel.h> + +#include "tu_entrypoints.h" + +#define MAX_VBS 32 +#define MAX_VERTEX_ATTRIBS 32 +#define MAX_RTS 8 +#define MAX_VSC_PIPES 32 +#define MAX_VIEWPORTS 1 +#define MAX_SCISSORS 16 +#define MAX_DISCARD_RECTANGLES 4 +#define MAX_PUSH_CONSTANTS_SIZE 128 +#define MAX_PUSH_DESCRIPTORS 32 +#define MAX_DYNAMIC_UNIFORM_BUFFERS 16 +#define MAX_DYNAMIC_STORAGE_BUFFERS 8 +#define MAX_DYNAMIC_BUFFERS \ + (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS) +#define MAX_SAMPLES_LOG2 4 +#define NUM_META_FS_KEYS 13 +#define TU_MAX_DRM_DEVICES 8 +#define MAX_VIEWS 8 + +#define NUM_DEPTH_CLEAR_PIPELINES 3 + +/* + * This is the point we switch from using CP to compute shader + * for certain buffer operations. + */ +#define TU_BUFFER_OPS_CS_THRESHOLD 4096 + +enum tu_mem_heap +{ + TU_MEM_HEAP_VRAM, + TU_MEM_HEAP_VRAM_CPU_ACCESS, + TU_MEM_HEAP_GTT, + TU_MEM_HEAP_COUNT +}; + +enum tu_mem_type +{ + TU_MEM_TYPE_VRAM, + TU_MEM_TYPE_GTT_WRITE_COMBINE, + TU_MEM_TYPE_VRAM_CPU_ACCESS, + TU_MEM_TYPE_GTT_CACHED, + TU_MEM_TYPE_COUNT +}; + +#define tu_printflike(a, b) __attribute__((__format__(__printf__, a, b))) + +static inline uint32_t +align_u32(uint32_t v, uint32_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +static inline uint32_t +align_u32_npot(uint32_t v, uint32_t a) +{ + return (v + a - 1) / a * a; +} + +static inline uint64_t +align_u64(uint64_t v, uint64_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +static inline int32_t +align_i32(int32_t v, int32_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +/** Alignment must be a power of 2. */ +static inline bool +tu_is_aligned(uintmax_t n, uintmax_t a) +{ + assert(a == (a & -a)); + return (n & (a - 1)) == 0; +} + +static inline uint32_t +round_up_u32(uint32_t v, uint32_t a) +{ + return (v + a - 1) / a; +} + +static inline uint64_t +round_up_u64(uint64_t v, uint64_t a) +{ + return (v + a - 1) / a; +} + +static inline uint32_t +tu_minify(uint32_t n, uint32_t levels) +{ + if (unlikely(n == 0)) + return 0; + else + return MAX2(n >> levels, 1); +} +static inline float +tu_clamp_f(float f, float min, float max) +{ + assert(min < max); + + if (f > max) + return max; + else if (f < min) + return min; + else + return f; +} + +static inline bool +tu_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) +{ + if (*inout_mask & clear_mask) { + *inout_mask &= ~clear_mask; + return true; + } else { + return false; + } +} + +#define for_each_bit(b, dword) \ + for (uint32_t __dword = (dword); \ + (b) = __builtin_ffs(__dword) - 1, __dword; __dword &= ~(1 << (b))) + +#define typed_memcpy(dest, src, count) \ + ({ \ + STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \ + memcpy((dest), (src), (count) * sizeof(*(src))); \ + }) + +/* Whenever we generate an error, pass it through this function. Useful for + * debugging, where we can break on it. Only call at error site, not when + * propagating errors. Might be useful to plug in a stack trace here. + */ + +struct tu_instance; + +VkResult +__vk_errorf(struct tu_instance *instance, + VkResult error, + const char *file, + int line, + const char *format, + ...); + +#define vk_error(instance, error) \ + __vk_errorf(instance, error, __FILE__, __LINE__, NULL); +#define vk_errorf(instance, error, format, ...) \ + __vk_errorf(instance, error, __FILE__, __LINE__, format, ##__VA_ARGS__); + +void +__tu_finishme(const char *file, int line, const char *format, ...) + tu_printflike(3, 4); +void +tu_loge(const char *format, ...) tu_printflike(1, 2); +void +tu_loge_v(const char *format, va_list va); +void +tu_logi(const char *format, ...) tu_printflike(1, 2); +void +tu_logi_v(const char *format, va_list va); + +/** + * Print a FINISHME message, including its source location. + */ +#define tu_finishme(format, ...) \ + do { \ + static bool reported = false; \ + if (!reported) { \ + __tu_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \ + reported = true; \ + } \ + } while (0) + +/* A non-fatal assert. Useful for debugging. */ +#ifdef DEBUG +#define tu_assert(x) \ + ({ \ + if (unlikely(!(x))) \ + fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ + }) +#else +#define tu_assert(x) +#endif + +/* Suppress -Wunused in stub functions */ +#define tu_use_args(...) __tu_use_args(0, ##__VA_ARGS__) +static inline void +__tu_use_args(int ignore, ...) +{ +} + +#define tu_stub() \ + do { \ + tu_finishme("stub %s", __func__); \ + } while (0) + +void * +tu_lookup_entrypoint_unchecked(const char *name); +void * +tu_lookup_entrypoint_checked( + const char *name, + uint32_t core_version, + const struct tu_instance_extension_table *instance, + const struct tu_device_extension_table *device); + +struct tu_physical_device +{ + VK_LOADER_DATA _loader_data; + + struct tu_instance *instance; + + char path[20]; + char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; + uint8_t driver_uuid[VK_UUID_SIZE]; + uint8_t device_uuid[VK_UUID_SIZE]; + uint8_t cache_uuid[VK_UUID_SIZE]; + + struct wsi_device wsi_device; + + int local_fd; + int master_fd; + + unsigned gpu_id; + uint32_t gmem_size; + uint32_t tile_align_w; + uint32_t tile_align_h; + + /* This is the drivers on-disk cache used as a fallback as opposed to + * the pipeline cache defined by apps. + */ + struct disk_cache *disk_cache; + + struct tu_device_extension_table supported_extensions; +}; + +enum tu_debug_flags +{ + TU_DEBUG_STARTUP = 1 << 0, + TU_DEBUG_NIR = 1 << 1, + TU_DEBUG_IR3 = 1 << 2, +}; + +struct tu_instance +{ + VK_LOADER_DATA _loader_data; + + VkAllocationCallbacks alloc; + + uint32_t api_version; + int physical_device_count; + struct tu_physical_device physical_devices[TU_MAX_DRM_DEVICES]; + + enum tu_debug_flags debug_flags; + + struct vk_debug_report_instance debug_report_callbacks; + + struct tu_instance_extension_table enabled_extensions; +}; + +VkResult +tu_wsi_init(struct tu_physical_device *physical_device); +void +tu_wsi_finish(struct tu_physical_device *physical_device); + +bool +tu_instance_extension_supported(const char *name); +uint32_t +tu_physical_device_api_version(struct tu_physical_device *dev); +bool +tu_physical_device_extension_supported(struct tu_physical_device *dev, + const char *name); + +struct cache_entry; + +struct tu_pipeline_cache +{ + struct tu_device *device; + pthread_mutex_t mutex; + + uint32_t total_size; + uint32_t table_size; + uint32_t kernel_count; + struct cache_entry **hash_table; + bool modified; + + VkAllocationCallbacks alloc; +}; + +struct tu_pipeline_key +{ +}; + +void +tu_pipeline_cache_init(struct tu_pipeline_cache *cache, + struct tu_device *device); +void +tu_pipeline_cache_finish(struct tu_pipeline_cache *cache); +void +tu_pipeline_cache_load(struct tu_pipeline_cache *cache, + const void *data, + size_t size); + +struct tu_shader_variant; + +bool +tu_create_shader_variants_from_pipeline_cache( + struct tu_device *device, + struct tu_pipeline_cache *cache, + const unsigned char *sha1, + struct tu_shader_variant **variants); + +void +tu_pipeline_cache_insert_shaders(struct tu_device *device, + struct tu_pipeline_cache *cache, + const unsigned char *sha1, + struct tu_shader_variant **variants, + const void *const *codes, + const unsigned *code_sizes); + +struct tu_meta_state +{ + VkAllocationCallbacks alloc; + + struct tu_pipeline_cache cache; +}; + +/* queue types */ +#define TU_QUEUE_GENERAL 0 + +#define TU_MAX_QUEUE_FAMILIES 1 + +struct tu_fence +{ + bool signaled; + int fd; +}; + +void +tu_fence_init(struct tu_fence *fence, bool signaled); +void +tu_fence_finish(struct tu_fence *fence); +void +tu_fence_update_fd(struct tu_fence *fence, int fd); +void +tu_fence_copy(struct tu_fence *fence, const struct tu_fence *src); +void +tu_fence_signal(struct tu_fence *fence); +void +tu_fence_wait_idle(struct tu_fence *fence); + +struct tu_queue +{ + VK_LOADER_DATA _loader_data; + struct tu_device *device; + uint32_t queue_family_index; + int queue_idx; + VkDeviceQueueCreateFlags flags; + + uint32_t msm_queue_id; + struct tu_fence submit_fence; +}; + +struct tu_device +{ + VK_LOADER_DATA _loader_data; + + VkAllocationCallbacks alloc; + + struct tu_instance *instance; + + struct tu_meta_state meta_state; + + struct tu_queue *queues[TU_MAX_QUEUE_FAMILIES]; + int queue_count[TU_MAX_QUEUE_FAMILIES]; + + struct tu_physical_device *physical_device; + + struct ir3_compiler *compiler; + + /* Backup in-memory cache to be used if the app doesn't provide one */ + struct tu_pipeline_cache *mem_cache; + + struct list_head shader_slabs; + mtx_t shader_slab_mutex; + + struct tu_device_extension_table enabled_extensions; +}; + +struct tu_bo +{ + uint32_t gem_handle; + uint64_t size; + uint64_t iova; + void *map; +}; + +VkResult +tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size); +VkResult +tu_bo_init_dmabuf(struct tu_device *dev, + struct tu_bo *bo, + uint64_t size, + int fd); +int +tu_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo); +void +tu_bo_finish(struct tu_device *dev, struct tu_bo *bo); +VkResult +tu_bo_map(struct tu_device *dev, struct tu_bo *bo); + +struct tu_cs_entry +{ + /* No ownership */ + const struct tu_bo *bo; + + uint32_t size; + uint32_t offset; +}; + +enum tu_cs_mode +{ + + /* + * A command stream in TU_CS_MODE_GROW mode grows automatically whenever it + * is full. tu_cs_begin must be called before command packet emission and + * tu_cs_end must be called after. + * + * This mode may create multiple entries internally. The entries must be + * submitted together. + */ + TU_CS_MODE_GROW, + + /* + * A command stream in TU_CS_MODE_EXTERNAL mode wraps an external, + * fixed-size buffer. tu_cs_begin and tu_cs_end are optional and have no + * effect on it. + * + * This mode does not create any entry or any BO. + */ + TU_CS_MODE_EXTERNAL, + + /* + * A command stream in TU_CS_MODE_SUB_STREAM mode does not support direct + * command packet emission. tu_cs_begin_sub_stream must be called to get a + * sub-stream to emit comamnd packets to. When done with the sub-stream, + * tu_cs_end_sub_stream must be called. + * + * This mode does not create any entry internally. + */ + TU_CS_MODE_SUB_STREAM, +}; + +struct tu_cs +{ + uint32_t *start; + uint32_t *cur; + uint32_t *reserved_end; + uint32_t *end; + + enum tu_cs_mode mode; + uint32_t next_bo_size; + + struct tu_cs_entry *entries; + uint32_t entry_count; + uint32_t entry_capacity; + + struct tu_bo **bos; + uint32_t bo_count; + uint32_t bo_capacity; +}; + +struct tu_device_memory +{ + struct tu_bo bo; + VkDeviceSize size; + + /* for dedicated allocations */ + struct tu_image *image; + struct tu_buffer *buffer; + + uint32_t type_index; + void *map; + void *user_ptr; +}; + +struct tu_descriptor_range +{ + uint64_t va; + uint32_t size; +}; + +struct tu_descriptor_set +{ + const struct tu_descriptor_set_layout *layout; + uint32_t size; + + uint64_t va; + uint32_t *mapped_ptr; + struct tu_descriptor_range *dynamic_descriptors; +}; + +struct tu_push_descriptor_set +{ + struct tu_descriptor_set set; + uint32_t capacity; +}; + +struct tu_descriptor_pool_entry +{ + uint32_t offset; + uint32_t size; + struct tu_descriptor_set *set; +}; + +struct tu_descriptor_pool +{ + uint8_t *mapped_ptr; + uint64_t current_offset; + uint64_t size; + + uint8_t *host_memory_base; + uint8_t *host_memory_ptr; + uint8_t *host_memory_end; + + uint32_t entry_count; + uint32_t max_entry_count; + struct tu_descriptor_pool_entry entries[0]; +}; + +struct tu_descriptor_update_template_entry +{ + VkDescriptorType descriptor_type; + + /* The number of descriptors to update */ + uint32_t descriptor_count; + + /* Into mapped_ptr or dynamic_descriptors, in units of the respective array + */ + uint32_t dst_offset; + + /* In dwords. Not valid/used for dynamic descriptors */ + uint32_t dst_stride; + + uint32_t buffer_offset; + + /* Only valid for combined image samplers and samplers */ + uint16_t has_sampler; + + /* In bytes */ + size_t src_offset; + size_t src_stride; + + /* For push descriptors */ + const uint32_t *immutable_samplers; +}; + +struct tu_descriptor_update_template +{ + uint32_t entry_count; + VkPipelineBindPoint bind_point; + struct tu_descriptor_update_template_entry entry[0]; +}; + +struct tu_buffer +{ + VkDeviceSize size; + + VkBufferUsageFlags usage; + VkBufferCreateFlags flags; + + struct tu_bo *bo; + VkDeviceSize bo_offset; +}; + +enum tu_dynamic_state_bits +{ + TU_DYNAMIC_VIEWPORT = 1 << 0, + TU_DYNAMIC_SCISSOR = 1 << 1, + TU_DYNAMIC_LINE_WIDTH = 1 << 2, + TU_DYNAMIC_DEPTH_BIAS = 1 << 3, + TU_DYNAMIC_BLEND_CONSTANTS = 1 << 4, + TU_DYNAMIC_DEPTH_BOUNDS = 1 << 5, + TU_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, + TU_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, + TU_DYNAMIC_STENCIL_REFERENCE = 1 << 8, + TU_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, + TU_DYNAMIC_ALL = (1 << 10) - 1, +}; + +struct tu_vertex_binding +{ + struct tu_buffer *buffer; + VkDeviceSize offset; +}; + +struct tu_viewport_state +{ + uint32_t count; + VkViewport viewports[MAX_VIEWPORTS]; +}; + +struct tu_scissor_state +{ + uint32_t count; + VkRect2D scissors[MAX_SCISSORS]; +}; + +struct tu_discard_rectangle_state +{ + uint32_t count; + VkRect2D rectangles[MAX_DISCARD_RECTANGLES]; +}; + +struct tu_dynamic_state +{ + /** + * Bitmask of (1 << VK_DYNAMIC_STATE_*). + * Defines the set of saved dynamic state. + */ + uint32_t mask; + + struct tu_viewport_state viewport; + + struct tu_scissor_state scissor; + + float line_width; + + struct + { + float bias; + float clamp; + float slope; + } depth_bias; + + float blend_constants[4]; + + struct + { + float min; + float max; + } depth_bounds; + + struct + { + uint32_t front; + uint32_t back; + } stencil_compare_mask; + + struct + { + uint32_t front; + uint32_t back; + } stencil_write_mask; + + struct + { + uint32_t front; + uint32_t back; + } stencil_reference; + + struct tu_discard_rectangle_state discard_rectangle; +}; + +extern const struct tu_dynamic_state default_dynamic_state; + +const char * +tu_get_debug_option_name(int id); + +const char * +tu_get_perftest_option_name(int id); + +/** + * Attachment state when recording a renderpass instance. + * + * The clear value is valid only if there exists a pending clear. + */ +struct tu_attachment_state +{ + VkImageAspectFlags pending_clear_aspects; + uint32_t cleared_views; + VkClearValue clear_value; + VkImageLayout current_layout; +}; + +struct tu_descriptor_state +{ + struct tu_descriptor_set *sets[MAX_SETS]; + uint32_t dirty; + uint32_t valid; + struct tu_push_descriptor_set push_set; + bool push_dirty; + uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS]; +}; + +struct tu_tile +{ + uint8_t pipe; + uint8_t slot; + VkOffset2D begin; + VkOffset2D end; +}; + +struct tu_tiling_config +{ + VkRect2D render_area; + uint32_t buffer_cpp[MAX_RTS + 2]; + uint32_t buffer_count; + + /* position and size of the first tile */ + VkRect2D tile0; + /* number of tiles */ + VkExtent2D tile_count; + + uint32_t gmem_offsets[MAX_RTS + 2]; + + /* size of the first VSC pipe */ + VkExtent2D pipe0; + /* number of VSC pipes */ + VkExtent2D pipe_count; + + /* pipe register values */ + uint32_t pipe_config[MAX_VSC_PIPES]; + uint32_t pipe_sizes[MAX_VSC_PIPES]; +}; + +enum tu_cmd_dirty_bits +{ + TU_CMD_DIRTY_PIPELINE = 1 << 0, + TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 1, + + TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 16, + TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 17, + TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 18, + TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 19, +}; + +struct tu_cmd_state +{ + uint32_t dirty; + + struct tu_pipeline *pipeline; + + /* Vertex buffers */ + struct + { + struct tu_buffer *buffers[MAX_VBS]; + VkDeviceSize offsets[MAX_VBS]; + } vb; + + struct tu_dynamic_state dynamic; + + /* Index buffer */ + struct tu_buffer *index_buffer; + uint64_t index_offset; + uint32_t index_type; + uint32_t max_index_count; + uint64_t index_va; + + const struct tu_render_pass *pass; + const struct tu_subpass *subpass; + const struct tu_framebuffer *framebuffer; + struct tu_attachment_state *attachments; + + struct tu_tiling_config tiling_config; + + struct tu_cs_entry tile_load_ib; + struct tu_cs_entry tile_store_ib; +}; + +struct tu_cmd_pool +{ + VkAllocationCallbacks alloc; + struct list_head cmd_buffers; + struct list_head free_cmd_buffers; + uint32_t queue_family_index; +}; + +struct tu_cmd_buffer_upload +{ + uint8_t *map; + unsigned offset; + uint64_t size; + struct list_head list; +}; + +enum tu_cmd_buffer_status +{ + TU_CMD_BUFFER_STATUS_INVALID, + TU_CMD_BUFFER_STATUS_INITIAL, + TU_CMD_BUFFER_STATUS_RECORDING, + TU_CMD_BUFFER_STATUS_EXECUTABLE, + TU_CMD_BUFFER_STATUS_PENDING, +}; + +struct tu_bo_list +{ + uint32_t count; + uint32_t capacity; + struct drm_msm_gem_submit_bo *bo_infos; +}; + +#define TU_BO_LIST_FAILED (~0) + +void +tu_bo_list_init(struct tu_bo_list *list); +void +tu_bo_list_destroy(struct tu_bo_list *list); +void +tu_bo_list_reset(struct tu_bo_list *list); +uint32_t +tu_bo_list_add(struct tu_bo_list *list, + const struct tu_bo *bo, + uint32_t flags); +VkResult +tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other); + +struct tu_cmd_buffer +{ + VK_LOADER_DATA _loader_data; + + struct tu_device *device; + + struct tu_cmd_pool *pool; + struct list_head pool_link; + + VkCommandBufferUsageFlags usage_flags; + VkCommandBufferLevel level; + enum tu_cmd_buffer_status status; + + struct tu_cmd_state state; + struct tu_vertex_binding vertex_bindings[MAX_VBS]; + uint32_t queue_family_index; + + uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE]; + VkShaderStageFlags push_constant_stages; + struct tu_descriptor_set meta_push_descriptors; + + struct tu_descriptor_state descriptors[VK_PIPELINE_BIND_POINT_RANGE_SIZE]; + + struct tu_cmd_buffer_upload upload; + + VkResult record_result; + + struct tu_bo_list bo_list; + struct tu_cs cs; + struct tu_cs draw_cs; + struct tu_cs tile_cs; + + uint16_t marker_reg; + uint32_t marker_seqno; + + struct tu_bo scratch_bo; + uint32_t scratch_seqno; + + bool wait_for_idle; +}; + +void +tu6_emit_event_write(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + enum vgt_event_type event, + bool need_seqno); + +bool +tu_get_memory_fd(struct tu_device *device, + struct tu_device_memory *memory, + int *pFD); + +/* + * Takes x,y,z as exact numbers of invocations, instead of blocks. + * + * Limitations: Can't call normal dispatch functions without binding or + * rebinding + * the compute pipeline. + */ +void +tu_unaligned_dispatch(struct tu_cmd_buffer *cmd_buffer, + uint32_t x, + uint32_t y, + uint32_t z); + +struct tu_event +{ + uint64_t *map; +}; + +struct tu_shader_module; + +#define TU_HASH_SHADER_IS_GEOM_COPY_SHADER (1 << 0) +#define TU_HASH_SHADER_SISCHED (1 << 1) +#define TU_HASH_SHADER_UNSAFE_MATH (1 << 2) +void +tu_hash_shaders(unsigned char *hash, + const VkPipelineShaderStageCreateInfo **stages, + const struct tu_pipeline_layout *layout, + const struct tu_pipeline_key *key, + uint32_t flags); + +static inline gl_shader_stage +vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) +{ + assert(__builtin_popcount(vk_stage) == 1); + return ffs(vk_stage) - 1; +} + +static inline VkShaderStageFlagBits +mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) +{ + return (1 << mesa_stage); +} + +#define TU_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1) + +#define tu_foreach_stage(stage, stage_bits) \ + for (gl_shader_stage stage, \ + __tmp = (gl_shader_stage)((stage_bits) &TU_STAGE_MASK); \ + stage = __builtin_ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage))) + +struct tu_shader_module +{ + unsigned char sha1[20]; + + uint32_t code_size; + const uint32_t *code[0]; +}; + +struct tu_shader_compile_options +{ + struct ir3_shader_key key; + + bool optimize; + bool include_binning_pass; +}; + +struct tu_shader +{ + struct ir3_shader ir3_shader; + + /* This may be true for vertex shaders. When true, variants[1] is the + * binning variant and binning_binary is non-NULL. + */ + bool has_binning_pass; + + void *binary; + void *binning_binary; + + struct ir3_shader_variant variants[0]; +}; + +struct tu_shader * +tu_shader_create(struct tu_device *dev, + gl_shader_stage stage, + const VkPipelineShaderStageCreateInfo *stage_info, + const VkAllocationCallbacks *alloc); + +void +tu_shader_destroy(struct tu_device *dev, + struct tu_shader *shader, + const VkAllocationCallbacks *alloc); + +void +tu_shader_compile_options_init( + struct tu_shader_compile_options *options, + const VkGraphicsPipelineCreateInfo *pipeline_info); + +VkResult +tu_shader_compile(struct tu_device *dev, + struct tu_shader *shader, + const struct tu_shader *next_stage, + const struct tu_shader_compile_options *options, + const VkAllocationCallbacks *alloc); + +struct tu_pipeline +{ + struct tu_cs cs; + + struct tu_dynamic_state dynamic_state; + + struct tu_pipeline_layout *layout; + + bool need_indirect_descriptor_sets; + VkShaderStageFlags active_stages; + + struct + { + struct tu_bo binary_bo; + struct tu_cs_entry state_ib; + struct tu_cs_entry binning_state_ib; + } program; + + struct + { + uint8_t bindings[MAX_VERTEX_ATTRIBS]; + uint16_t strides[MAX_VERTEX_ATTRIBS]; + uint16_t offsets[MAX_VERTEX_ATTRIBS]; + uint32_t count; + + uint8_t binning_bindings[MAX_VERTEX_ATTRIBS]; + uint16_t binning_strides[MAX_VERTEX_ATTRIBS]; + uint16_t binning_offsets[MAX_VERTEX_ATTRIBS]; + uint32_t binning_count; + + struct tu_cs_entry state_ib; + struct tu_cs_entry binning_state_ib; + } vi; + + struct + { + enum pc_di_primtype primtype; + bool primitive_restart; + } ia; + + struct + { + struct tu_cs_entry state_ib; + } vp; + + struct + { + uint32_t gras_su_cntl; + struct tu_cs_entry state_ib; + } rast; + + struct + { + struct tu_cs_entry state_ib; + } ds; + + struct + { + struct tu_cs_entry state_ib; + } blend; +}; + +void +tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport); + +void +tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor); + +void +tu6_emit_gras_su_cntl(struct tu_cs *cs, + uint32_t gras_su_cntl, + float line_width); + +void +tu6_emit_depth_bias(struct tu_cs *cs, + float constant_factor, + float clamp, + float slope_factor); + +void +tu6_emit_stencil_compare_mask(struct tu_cs *cs, + uint32_t front, + uint32_t back); + +void +tu6_emit_stencil_write_mask(struct tu_cs *cs, uint32_t front, uint32_t back); + +void +tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back); + +void +tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4]); + +struct tu_userdata_info * +tu_lookup_user_sgpr(struct tu_pipeline *pipeline, + gl_shader_stage stage, + int idx); + +struct tu_shader_variant * +tu_get_shader(struct tu_pipeline *pipeline, gl_shader_stage stage); + +struct tu_graphics_pipeline_create_info +{ + bool use_rectlist; + bool db_depth_clear; + bool db_stencil_clear; + bool db_depth_disable_expclear; + bool db_stencil_disable_expclear; + bool db_flush_depth_inplace; + bool db_flush_stencil_inplace; + bool db_resummarize; + uint32_t custom_blend_mode; +}; + +struct tu_native_format +{ + int vtx; /* VFMTn_xxx or -1 */ + int tex; /* TFMTn_xxx or -1 */ + int rb; /* RBn_xxx or -1 */ + int swap; /* enum a3xx_color_swap */ + bool present; /* internal only; always true to external users */ +}; + +const struct tu_native_format * +tu6_get_native_format(VkFormat format); + +int +tu_pack_clear_value(const VkClearValue *val, + VkFormat format, + uint32_t buf[4]); +enum a6xx_2d_ifmt tu6_rb_fmt_to_ifmt(enum a6xx_color_fmt fmt); + +struct tu_image_level +{ + VkDeviceSize offset; + VkDeviceSize size; + uint32_t pitch; +}; + +struct tu_image +{ + VkImageType type; + /* The original VkFormat provided by the client. This may not match any + * of the actual surface formats. + */ + VkFormat vk_format; + VkImageAspectFlags aspects; + VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ + VkImageTiling tiling; /** VkImageCreateInfo::tiling */ + VkImageCreateFlags flags; /** VkImageCreateInfo::flags */ + VkExtent3D extent; + uint32_t level_count; + uint32_t layer_count; + + VkDeviceSize size; + uint32_t alignment; + + /* memory layout */ + VkDeviceSize layer_size; + struct tu_image_level levels[15]; + unsigned tile_mode; + + unsigned queue_family_mask; + bool exclusive; + bool shareable; + + /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */ + VkDeviceMemory owned_memory; + + /* Set when bound */ + const struct tu_bo *bo; + VkDeviceSize bo_offset; +}; + +unsigned +tu_image_queue_family_mask(const struct tu_image *image, + uint32_t family, + uint32_t queue_family); + +static inline uint32_t +tu_get_layerCount(const struct tu_image *image, + const VkImageSubresourceRange *range) +{ + return range->layerCount == VK_REMAINING_ARRAY_LAYERS + ? image->layer_count - range->baseArrayLayer + : range->layerCount; +} + +static inline uint32_t +tu_get_levelCount(const struct tu_image *image, + const VkImageSubresourceRange *range) +{ + return range->levelCount == VK_REMAINING_MIP_LEVELS + ? image->level_count - range->baseMipLevel + : range->levelCount; +} + +struct tu_image_view +{ + struct tu_image *image; /**< VkImageViewCreateInfo::image */ + + VkImageViewType type; + VkImageAspectFlags aspect_mask; + VkFormat vk_format; + uint32_t base_layer; + uint32_t layer_count; + uint32_t base_mip; + uint32_t level_count; + VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ + + uint32_t descriptor[16]; + + /* Descriptor for use as a storage image as opposed to a sampled image. + * This has a few differences for cube maps (e.g. type). + */ + uint32_t storage_descriptor[16]; +}; + +struct tu_sampler +{ +}; + +struct tu_image_create_info +{ + const VkImageCreateInfo *vk_info; + bool scanout; + bool no_metadata_planes; +}; + +VkResult +tu_image_create(VkDevice _device, + const struct tu_image_create_info *info, + const VkAllocationCallbacks *alloc, + VkImage *pImage); + +VkResult +tu_image_from_gralloc(VkDevice device_h, + const VkImageCreateInfo *base_info, + const VkNativeBufferANDROID *gralloc_info, + const VkAllocationCallbacks *alloc, + VkImage *out_image_h); + +void +tu_image_view_init(struct tu_image_view *view, + struct tu_device *device, + const VkImageViewCreateInfo *pCreateInfo); + +struct tu_buffer_view +{ + VkFormat vk_format; + uint64_t range; /**< VkBufferViewCreateInfo::range */ + uint32_t state[4]; +}; +void +tu_buffer_view_init(struct tu_buffer_view *view, + struct tu_device *device, + const VkBufferViewCreateInfo *pCreateInfo); + +static inline struct VkExtent3D +tu_sanitize_image_extent(const VkImageType imageType, + const struct VkExtent3D imageExtent) +{ + switch (imageType) { + case VK_IMAGE_TYPE_1D: + return (VkExtent3D) { imageExtent.width, 1, 1 }; + case VK_IMAGE_TYPE_2D: + return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 }; + case VK_IMAGE_TYPE_3D: + return imageExtent; + default: + unreachable("invalid image type"); + } +} + +static inline struct VkOffset3D +tu_sanitize_image_offset(const VkImageType imageType, + const struct VkOffset3D imageOffset) +{ + switch (imageType) { + case VK_IMAGE_TYPE_1D: + return (VkOffset3D) { imageOffset.x, 0, 0 }; + case VK_IMAGE_TYPE_2D: + return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 }; + case VK_IMAGE_TYPE_3D: + return imageOffset; + default: + unreachable("invalid image type"); + } +} + +struct tu_attachment_info +{ + struct tu_image_view *attachment; +}; + +struct tu_framebuffer +{ + uint32_t width; + uint32_t height; + uint32_t layers; + + uint32_t attachment_count; + struct tu_attachment_info attachments[0]; +}; + +struct tu_subpass_barrier +{ + VkPipelineStageFlags src_stage_mask; + VkAccessFlags src_access_mask; + VkAccessFlags dst_access_mask; +}; + +void +tu_subpass_barrier(struct tu_cmd_buffer *cmd_buffer, + const struct tu_subpass_barrier *barrier); + +struct tu_subpass_attachment +{ + uint32_t attachment; + VkImageLayout layout; +}; + +struct tu_subpass +{ + uint32_t input_count; + uint32_t color_count; + struct tu_subpass_attachment *input_attachments; + struct tu_subpass_attachment *color_attachments; + struct tu_subpass_attachment *resolve_attachments; + struct tu_subpass_attachment depth_stencil_attachment; + + /** Subpass has at least one resolve attachment */ + bool has_resolve; + + struct tu_subpass_barrier start_barrier; + + uint32_t view_mask; + VkSampleCountFlagBits max_sample_count; +}; + +struct tu_render_pass_attachment +{ + VkFormat format; + uint32_t samples; + VkAttachmentLoadOp load_op; + VkAttachmentLoadOp stencil_load_op; + VkImageLayout initial_layout; + VkImageLayout final_layout; + uint32_t view_mask; +}; + +struct tu_render_pass +{ + uint32_t attachment_count; + uint32_t subpass_count; + struct tu_subpass_attachment *subpass_attachments; + struct tu_render_pass_attachment *attachments; + struct tu_subpass_barrier end_barrier; + struct tu_subpass subpasses[0]; +}; + +VkResult +tu_device_init_meta(struct tu_device *device); +void +tu_device_finish_meta(struct tu_device *device); + +struct tu_query_pool +{ + uint32_t stride; + uint32_t availability_offset; + uint64_t size; + char *ptr; + VkQueryType type; + uint32_t pipeline_stats_mask; +}; + +struct tu_semaphore +{ + uint32_t syncobj; + uint32_t temp_syncobj; +}; + +void +tu_set_descriptor_set(struct tu_cmd_buffer *cmd_buffer, + VkPipelineBindPoint bind_point, + struct tu_descriptor_set *set, + unsigned idx); + +void +tu_update_descriptor_sets(struct tu_device *device, + struct tu_cmd_buffer *cmd_buffer, + VkDescriptorSet overrideSet, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet *pDescriptorCopies); + +void +tu_update_descriptor_set_with_template( + struct tu_device *device, + struct tu_cmd_buffer *cmd_buffer, + struct tu_descriptor_set *set, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + const void *pData); + +void +tu_meta_push_descriptor_set(struct tu_cmd_buffer *cmd_buffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t set, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites); + +int +tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id); + +int +tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size); + +int +tu_drm_submitqueue_new(const struct tu_device *dev, + int priority, + uint32_t *queue_id); + +void +tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id); + +uint32_t +tu_gem_new(const struct tu_device *dev, uint64_t size, uint32_t flags); +uint32_t +tu_gem_import_dmabuf(const struct tu_device *dev, + int prime_fd, + uint64_t size); +int +tu_gem_export_dmabuf(const struct tu_device *dev, uint32_t gem_handle); +void +tu_gem_close(const struct tu_device *dev, uint32_t gem_handle); +uint64_t +tu_gem_info_offset(const struct tu_device *dev, uint32_t gem_handle); +uint64_t +tu_gem_info_iova(const struct tu_device *dev, uint32_t gem_handle); + +#define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \ + \ + static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \ + { \ + return (struct __tu_type *) _handle; \ + } \ + \ + static inline __VkType __tu_type##_to_handle(struct __tu_type *_obj) \ + { \ + return (__VkType) _obj; \ + } + +#define TU_DEFINE_NONDISP_HANDLE_CASTS(__tu_type, __VkType) \ + \ + static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \ + { \ + return (struct __tu_type *) (uintptr_t) _handle; \ + } \ + \ + static inline __VkType __tu_type##_to_handle(struct __tu_type *_obj) \ + { \ + return (__VkType)(uintptr_t) _obj; \ + } + +#define TU_FROM_HANDLE(__tu_type, __name, __handle) \ + struct __tu_type *__name = __tu_type##_from_handle(__handle) + +TU_DEFINE_HANDLE_CASTS(tu_cmd_buffer, VkCommandBuffer) +TU_DEFINE_HANDLE_CASTS(tu_device, VkDevice) +TU_DEFINE_HANDLE_CASTS(tu_instance, VkInstance) +TU_DEFINE_HANDLE_CASTS(tu_physical_device, VkPhysicalDevice) +TU_DEFINE_HANDLE_CASTS(tu_queue, VkQueue) + +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_cmd_pool, VkCommandPool) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer, VkBuffer) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer_view, VkBufferView) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_pool, VkDescriptorPool) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set, VkDescriptorSet) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set_layout, + VkDescriptorSetLayout) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_update_template, + VkDescriptorUpdateTemplate) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_device_memory, VkDeviceMemory) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_fence, VkFence) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_event, VkEvent) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_framebuffer, VkFramebuffer) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_image, VkImage) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_image_view, VkImageView); +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_cache, VkPipelineCache) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline, VkPipeline) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_layout, VkPipelineLayout) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_query_pool, VkQueryPool) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_render_pass, VkRenderPass) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler, VkSampler) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_shader_module, VkShaderModule) +TU_DEFINE_NONDISP_HANDLE_CASTS(tu_semaphore, VkSemaphore) + +#endif /* TU_PRIVATE_H */ diff --git a/lib/mesa/src/freedreno/vulkan/tu_query.c b/lib/mesa/src/freedreno/vulkan/tu_query.c new file mode 100644 index 000000000..2cb710fb1 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_query.c @@ -0,0 +1,122 @@ +/* + * Copyrigh 2016 Red Hat Inc. + * Based on anv: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include <assert.h> +#include <fcntl.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> + +#include "nir/nir_builder.h" + +VkResult +tu_CreateQueryPool(VkDevice _device, + const VkQueryPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkQueryPool *pQueryPool) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_query_pool *pool = + vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!pool) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + *pQueryPool = tu_query_pool_to_handle(pool); + return VK_SUCCESS; +} + +void +tu_DestroyQueryPool(VkDevice _device, + VkQueryPool _pool, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_query_pool, pool, _pool); + + if (!pool) + return; + + vk_free2(&device->alloc, pAllocator, pool); +} + +VkResult +tu_GetQueryPoolResults(VkDevice _device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + size_t dataSize, + void *pData, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ + return VK_SUCCESS; +} + +void +tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ +} + +void +tu_CmdResetQueryPool(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount) +{ +} + +void +tu_CmdBeginQuery(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags) +{ +} + +void +tu_CmdEndQuery(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query) +{ +} + +void +tu_CmdWriteTimestamp(VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t query) +{ +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_shader.c b/lib/mesa/src/freedreno/vulkan/tu_shader.c new file mode 100644 index 000000000..f6e13d7c4 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_shader.c @@ -0,0 +1,336 @@ +/* + * Copyright © 2019 Google LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include "spirv/nir_spirv.h" +#include "util/mesa-sha1.h" + +#include "ir3/ir3_nir.h" + +static nir_shader * +tu_spirv_to_nir(struct ir3_compiler *compiler, + const uint32_t *words, + size_t word_count, + gl_shader_stage stage, + const char *entry_point_name, + const VkSpecializationInfo *spec_info) +{ + /* TODO these are made-up */ + const struct spirv_to_nir_options spirv_options = { + .lower_ubo_ssbo_access_to_offsets = true, + .caps = { false }, + }; + const nir_shader_compiler_options *nir_options = + ir3_get_compiler_options(compiler); + + /* convert VkSpecializationInfo */ + struct nir_spirv_specialization *spec = NULL; + uint32_t num_spec = 0; + if (spec_info && spec_info->mapEntryCount) { + spec = malloc(sizeof(*spec) * spec_info->mapEntryCount); + if (!spec) + return NULL; + + for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) { + const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i]; + const void *data = spec_info->pData + entry->offset; + assert(data + entry->size <= spec_info->pData + spec_info->dataSize); + spec[i].id = entry->constantID; + if (entry->size == 8) + spec[i].data64 = *(const uint64_t *) data; + else + spec[i].data32 = *(const uint32_t *) data; + spec[i].defined_on_module = false; + } + + num_spec = spec_info->mapEntryCount; + } + + nir_shader *nir = + spirv_to_nir(words, word_count, spec, num_spec, stage, entry_point_name, + &spirv_options, nir_options); + + free(spec); + + assert(nir->info.stage == stage); + nir_validate_shader(nir, "after spirv_to_nir"); + + return nir; +} + +static void +tu_sort_variables_by_location(struct exec_list *variables) +{ + struct exec_list sorted; + exec_list_make_empty(&sorted); + + nir_foreach_variable_safe(var, variables) + { + exec_node_remove(&var->node); + + /* insert the variable into the sorted list */ + nir_variable *next = NULL; + nir_foreach_variable(tmp, &sorted) + { + if (var->data.location < tmp->data.location) { + next = tmp; + break; + } + } + if (next) + exec_node_insert_node_before(&next->node, &var->node); + else + exec_list_push_tail(&sorted, &var->node); + } + + exec_list_move_nodes_to(&sorted, variables); +} + +struct tu_shader * +tu_shader_create(struct tu_device *dev, + gl_shader_stage stage, + const VkPipelineShaderStageCreateInfo *stage_info, + const VkAllocationCallbacks *alloc) +{ + const struct tu_shader_module *module = + tu_shader_module_from_handle(stage_info->module); + struct tu_shader *shader; + + const uint32_t max_variant_count = (stage == MESA_SHADER_VERTEX) ? 2 : 1; + shader = vk_zalloc2( + &dev->alloc, alloc, + sizeof(*shader) + sizeof(struct ir3_shader_variant) * max_variant_count, + 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!shader) + return NULL; + + /* translate SPIR-V to NIR */ + assert(module->code_size % 4 == 0); + nir_shader *nir = tu_spirv_to_nir( + dev->compiler, (const uint32_t *) module->code, module->code_size / 4, + stage, stage_info->pName, stage_info->pSpecializationInfo); + if (!nir) { + vk_free2(&dev->alloc, alloc, shader); + return NULL; + } + + if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_NIR)) { + fprintf(stderr, "translated nir:\n"); + nir_print_shader(nir, stderr); + } + + /* TODO what needs to happen? */ + + switch (stage) { + case MESA_SHADER_VERTEX: + tu_sort_variables_by_location(&nir->outputs); + break; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + tu_sort_variables_by_location(&nir->inputs); + tu_sort_variables_by_location(&nir->outputs); + break; + case MESA_SHADER_FRAGMENT: + tu_sort_variables_by_location(&nir->inputs); + break; + case MESA_SHADER_COMPUTE: + break; + default: + unreachable("invalid gl_shader_stage"); + break; + } + + nir_assign_var_locations(&nir->inputs, &nir->num_inputs, + ir3_glsl_type_size); + nir_assign_var_locations(&nir->outputs, &nir->num_outputs, + ir3_glsl_type_size); + nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, + ir3_glsl_type_size); + + NIR_PASS_V(nir, nir_lower_system_values); + NIR_PASS_V(nir, nir_lower_frexp); + NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, 0); + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + shader->ir3_shader.compiler = dev->compiler; + shader->ir3_shader.type = stage; + shader->ir3_shader.nir = nir; + + return shader; +} + +void +tu_shader_destroy(struct tu_device *dev, + struct tu_shader *shader, + const VkAllocationCallbacks *alloc) +{ + if (shader->ir3_shader.nir) + ralloc_free(shader->ir3_shader.nir); + + for (uint32_t i = 0; i < 1 + shader->has_binning_pass; i++) { + if (shader->variants[i].ir) + ir3_destroy(shader->variants[i].ir); + } + + if (shader->ir3_shader.const_state.immediates) + free(shader->ir3_shader.const_state.immediates); + if (shader->binary) + free(shader->binary); + if (shader->binning_binary) + free(shader->binning_binary); + + vk_free2(&dev->alloc, alloc, shader); +} + +void +tu_shader_compile_options_init( + struct tu_shader_compile_options *options, + const VkGraphicsPipelineCreateInfo *pipeline_info) +{ + *options = (struct tu_shader_compile_options) { + /* TODO ir3_key */ + + .optimize = !(pipeline_info->flags & + VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT), + .include_binning_pass = true, + }; +} + +static uint32_t * +tu_compile_shader_variant(struct ir3_shader *shader, + const struct ir3_shader_key *key, + bool binning_pass, + struct ir3_shader_variant *variant) +{ + variant->shader = shader; + variant->type = shader->type; + variant->key = *key; + variant->binning_pass = binning_pass; + + int ret = ir3_compile_shader_nir(shader->compiler, variant); + if (ret) + return NULL; + + /* when assemble fails, we rely on tu_shader_destroy to clean up the + * variant + */ + return ir3_shader_assemble(variant, shader->compiler->gpu_id); +} + +VkResult +tu_shader_compile(struct tu_device *dev, + struct tu_shader *shader, + const struct tu_shader *next_stage, + const struct tu_shader_compile_options *options, + const VkAllocationCallbacks *alloc) +{ + if (options->optimize) { + /* ignore the key for the first pass of optimization */ + ir3_optimize_nir(&shader->ir3_shader, shader->ir3_shader.nir, NULL); + + if (unlikely(dev->physical_device->instance->debug_flags & + TU_DEBUG_NIR)) { + fprintf(stderr, "optimized nir:\n"); + nir_print_shader(shader->ir3_shader.nir, stderr); + } + } + + shader->binary = tu_compile_shader_variant( + &shader->ir3_shader, &options->key, false, &shader->variants[0]); + if (!shader->binary) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + /* compile another variant for the binning pass */ + if (options->include_binning_pass && + shader->ir3_shader.type == MESA_SHADER_VERTEX) { + shader->binning_binary = tu_compile_shader_variant( + &shader->ir3_shader, &options->key, true, &shader->variants[1]); + if (!shader->binning_binary) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + shader->has_binning_pass = true; + } + + if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_IR3)) { + fprintf(stderr, "disassembled ir3:\n"); + fprintf(stderr, "shader: %s\n", + gl_shader_stage_name(shader->ir3_shader.type)); + ir3_shader_disasm(&shader->variants[0], shader->binary, stderr); + + if (shader->has_binning_pass) { + fprintf(stderr, "disassembled ir3:\n"); + fprintf(stderr, "shader: %s (binning)\n", + gl_shader_stage_name(shader->ir3_shader.type)); + ir3_shader_disasm(&shader->variants[1], shader->binning_binary, + stderr); + } + } + + return VK_SUCCESS; +} + +VkResult +tu_CreateShaderModule(VkDevice _device, + const VkShaderModuleCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkShaderModule *pShaderModule) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + assert(pCreateInfo->codeSize % 4 == 0); + + module = vk_alloc2(&device->alloc, pAllocator, + sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (module == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + module->code_size = pCreateInfo->codeSize; + memcpy(module->code, pCreateInfo->pCode, pCreateInfo->codeSize); + + _mesa_sha1_compute(module->code, module->code_size, module->sha1); + + *pShaderModule = tu_shader_module_to_handle(module); + + return VK_SUCCESS; +} + +void +tu_DestroyShaderModule(VkDevice _device, + VkShaderModule _module, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_shader_module, module, _module); + + if (!module) + return; + + vk_free2(&device->alloc, pAllocator, module); +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_util.c b/lib/mesa/src/freedreno/vulkan/tu_util.c new file mode 100644 index 000000000..e630460fb --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_util.c @@ -0,0 +1,117 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include <assert.h> +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "util/u_math.h" +#include "vk_enum_to_str.h" + +/* TODO: Add Android support to tu_log funcs */ + +/** Log an error message. */ +void tu_printflike(1, 2) tu_loge(const char *format, ...) +{ + va_list va; + + va_start(va, format); + tu_loge_v(format, va); + va_end(va); +} + +/** \see tu_loge() */ +void +tu_loge_v(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); +} + +/** Log an error message. */ +void tu_printflike(1, 2) tu_logi(const char *format, ...) +{ + va_list va; + + va_start(va, format); + tu_logi_v(format, va); + va_end(va); +} + +/** \see tu_logi() */ +void +tu_logi_v(const char *format, va_list va) +{ + fprintf(stderr, "tu: info: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); +} + +void tu_printflike(3, 4) + __tu_finishme(const char *file, int line, const char *format, ...) +{ + va_list ap; + char buffer[256]; + + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); +} + +VkResult +__vk_errorf(struct tu_instance *instance, + VkResult error, + const char *file, + int line, + const char *format, + ...) +{ + va_list ap; + char buffer[256]; + + const char *error_str = vk_Result_to_str(error); + +#ifndef DEBUG + return error; +#endif + + if (format) { + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: %s (%s)\n", file, line, buffer, error_str); + } else { + fprintf(stderr, "%s:%d: %s\n", file, line, error_str); + } + + return error; +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_util.h b/lib/mesa/src/freedreno/vulkan/tu_util.h new file mode 100644 index 000000000..b013079d5 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_util.h @@ -0,0 +1,11 @@ +#ifndef TU_UTIL_H +#define TU_UTIL_H + +#ifdef HAVE___BUILTIN_POPCOUNT +#define util_bitcount(i) __builtin_popcount(i) +#else +extern unsigned int +util_bitcount(unsigned int n); +#endif + +#endif /* TU_UTIL_H */ diff --git a/lib/mesa/src/freedreno/vulkan/tu_wsi.c b/lib/mesa/src/freedreno/vulkan/tu_wsi.c new file mode 100644 index 000000000..21466108b --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_wsi.c @@ -0,0 +1,272 @@ +/* + * Copyright © 2016 Red Hat + * based on intel anv code: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include "vk_util.h" +#include "wsi_common.h" + +static PFN_vkVoidFunction +tu_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName) +{ + return tu_lookup_entrypoint_unchecked(pName); +} + +VkResult +tu_wsi_init(struct tu_physical_device *physical_device) +{ + return wsi_device_init(&physical_device->wsi_device, + tu_physical_device_to_handle(physical_device), + tu_wsi_proc_addr, &physical_device->instance->alloc, + physical_device->master_fd, NULL); +} + +void +tu_wsi_finish(struct tu_physical_device *physical_device) +{ + wsi_device_finish(&physical_device->wsi_device, + &physical_device->instance->alloc); +} + +void +tu_DestroySurfaceKHR(VkInstance _instance, + VkSurfaceKHR _surface, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_instance, instance, _instance); + ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface); + + vk_free2(&instance->alloc, pAllocator, surface); +} + +VkResult +tu_GetPhysicalDeviceSurfaceSupportKHR(VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR surface, + VkBool32 *pSupported) +{ + TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); + + return wsi_common_get_surface_support( + &device->wsi_device, queueFamilyIndex, surface, pSupported); +} + +VkResult +tu_GetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + VkSurfaceCapabilitiesKHR *pSurfaceCapabilities) +{ + TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); + + return wsi_common_get_surface_capabilities(&device->wsi_device, surface, + pSurfaceCapabilities); +} + +VkResult +tu_GetPhysicalDeviceSurfaceCapabilities2KHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo, + VkSurfaceCapabilities2KHR *pSurfaceCapabilities) +{ + TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); + + return wsi_common_get_surface_capabilities2( + &device->wsi_device, pSurfaceInfo, pSurfaceCapabilities); +} + +VkResult +tu_GetPhysicalDeviceSurfaceCapabilities2EXT( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + VkSurfaceCapabilities2EXT *pSurfaceCapabilities) +{ + TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); + + return wsi_common_get_surface_capabilities2ext( + &device->wsi_device, surface, pSurfaceCapabilities); +} + +VkResult +tu_GetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormatKHR *pSurfaceFormats) +{ + TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); + + return wsi_common_get_surface_formats( + &device->wsi_device, surface, pSurfaceFormatCount, pSurfaceFormats); +} + +VkResult +tu_GetPhysicalDeviceSurfaceFormats2KHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormat2KHR *pSurfaceFormats) +{ + TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); + + return wsi_common_get_surface_formats2(&device->wsi_device, pSurfaceInfo, + pSurfaceFormatCount, + pSurfaceFormats); +} + +VkResult +tu_GetPhysicalDeviceSurfacePresentModesKHR(VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pPresentModeCount, + VkPresentModeKHR *pPresentModes) +{ + TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); + + return wsi_common_get_surface_present_modes( + &device->wsi_device, surface, pPresentModeCount, pPresentModes); +} + +VkResult +tu_CreateSwapchainKHR(VkDevice _device, + const VkSwapchainCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSwapchainKHR *pSwapchain) +{ + TU_FROM_HANDLE(tu_device, device, _device); + const VkAllocationCallbacks *alloc; + if (pAllocator) + alloc = pAllocator; + else + alloc = &device->alloc; + + return wsi_common_create_swapchain(&device->physical_device->wsi_device, + tu_device_to_handle(device), + pCreateInfo, alloc, pSwapchain); +} + +void +tu_DestroySwapchainKHR(VkDevice _device, + VkSwapchainKHR swapchain, + const VkAllocationCallbacks *pAllocator) +{ + TU_FROM_HANDLE(tu_device, device, _device); + const VkAllocationCallbacks *alloc; + + if (pAllocator) + alloc = pAllocator; + else + alloc = &device->alloc; + + wsi_common_destroy_swapchain(_device, swapchain, alloc); +} + +VkResult +tu_GetSwapchainImagesKHR(VkDevice device, + VkSwapchainKHR swapchain, + uint32_t *pSwapchainImageCount, + VkImage *pSwapchainImages) +{ + return wsi_common_get_images(swapchain, pSwapchainImageCount, + pSwapchainImages); +} + +VkResult +tu_AcquireNextImageKHR(VkDevice device, + VkSwapchainKHR swapchain, + uint64_t timeout, + VkSemaphore semaphore, + VkFence fence, + uint32_t *pImageIndex) +{ + VkAcquireNextImageInfoKHR acquire_info = { + .sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR, + .swapchain = swapchain, + .timeout = timeout, + .semaphore = semaphore, + .fence = fence, + .deviceMask = 0, + }; + + return tu_AcquireNextImage2KHR(device, &acquire_info, pImageIndex); +} + +VkResult +tu_AcquireNextImage2KHR(VkDevice _device, + const VkAcquireNextImageInfoKHR *pAcquireInfo, + uint32_t *pImageIndex) +{ + TU_FROM_HANDLE(tu_device, device, _device); + struct tu_physical_device *pdevice = device->physical_device; + + VkResult result = wsi_common_acquire_next_image2( + &pdevice->wsi_device, _device, pAcquireInfo, pImageIndex); + + /* TODO signal fence and semaphore */ + + return result; +} + +VkResult +tu_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo) +{ + TU_FROM_HANDLE(tu_queue, queue, _queue); + return wsi_common_queue_present( + &queue->device->physical_device->wsi_device, + tu_device_to_handle(queue->device), _queue, queue->queue_family_index, + pPresentInfo); +} + +VkResult +tu_GetDeviceGroupPresentCapabilitiesKHR( + VkDevice device, VkDeviceGroupPresentCapabilitiesKHR *pCapabilities) +{ + memset(pCapabilities->presentMask, 0, sizeof(pCapabilities->presentMask)); + pCapabilities->presentMask[0] = 0x1; + pCapabilities->modes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR; + + return VK_SUCCESS; +} + +VkResult +tu_GetDeviceGroupSurfacePresentModesKHR( + VkDevice device, + VkSurfaceKHR surface, + VkDeviceGroupPresentModeFlagsKHR *pModes) +{ + *pModes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR; + + return VK_SUCCESS; +} + +VkResult +tu_GetPhysicalDevicePresentRectanglesKHR(VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pRectCount, + VkRect2D *pRects) +{ + TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); + + return wsi_common_get_present_rectangles(&device->wsi_device, surface, + pRectCount, pRects); +} diff --git a/lib/mesa/src/freedreno/vulkan/tu_wsi_wayland.c b/lib/mesa/src/freedreno/vulkan/tu_wsi_wayland.c new file mode 100644 index 000000000..b9148a1e2 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/tu_wsi_wayland.c @@ -0,0 +1,59 @@ +/* + * Copyright © 2016 Red Hat + * based on intel anv code: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "tu_private.h" + +#include "wsi_common_wayland.h" + +VkBool32 +tu_GetPhysicalDeviceWaylandPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + struct wl_display *display) +{ + TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice); + + return wsi_wl_get_presentation_support(&physical_device->wsi_device, + display); +} + +VkResult +tu_CreateWaylandSurfaceKHR(VkInstance _instance, + const VkWaylandSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) +{ + TU_FROM_HANDLE(tu_instance, instance, _instance); + const VkAllocationCallbacks *alloc; + assert(pCreateInfo->sType == + VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR); + + if (pAllocator) + alloc = pAllocator; + else + alloc = &instance->alloc; + + return wsi_create_wl_surface(alloc, pCreateInfo, pSurface); +} diff --git a/lib/mesa/src/freedreno/vulkan/vk_format.h b/lib/mesa/src/freedreno/vulkan/vk_format.h new file mode 100644 index 000000000..4e13bc9c0 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/vk_format.h @@ -0,0 +1,577 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Based on u_format.h which is: + * Copyright 2009-2010 Vmware, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef VK_FORMAT_H +#define VK_FORMAT_H + +#include <assert.h> +#include <util/macros.h> + +#include <vulkan/vulkan.h> + +enum vk_format_layout +{ + /** + * Formats with vk_format_block::width == vk_format_block::height == 1 + * that can be described as an ordinary data structure. + */ + VK_FORMAT_LAYOUT_PLAIN = 0, + + /** + * Formats with sub-sampled channels. + * + * This is for formats like YVYU where there is less than one sample per + * pixel. + */ + VK_FORMAT_LAYOUT_SUBSAMPLED = 3, + + /** + * S3 Texture Compression formats. + */ + VK_FORMAT_LAYOUT_S3TC = 4, + + /** + * Red-Green Texture Compression formats. + */ + VK_FORMAT_LAYOUT_RGTC = 5, + + /** + * Ericsson Texture Compression + */ + VK_FORMAT_LAYOUT_ETC = 6, + + /** + * BC6/7 Texture Compression + */ + VK_FORMAT_LAYOUT_BPTC = 7, + + /** + * ASTC + */ + VK_FORMAT_LAYOUT_ASTC = 8, + + /** + * Everything else that doesn't fit in any of the above layouts. + */ + VK_FORMAT_LAYOUT_OTHER = 9 +}; + +struct vk_format_block +{ + /** Block width in pixels */ + unsigned width; + + /** Block height in pixels */ + unsigned height; + + /** Block size in bits */ + unsigned bits; +}; + +enum vk_format_type +{ + VK_FORMAT_TYPE_VOID = 0, + VK_FORMAT_TYPE_UNSIGNED = 1, + VK_FORMAT_TYPE_SIGNED = 2, + VK_FORMAT_TYPE_FIXED = 3, + VK_FORMAT_TYPE_FLOAT = 4 +}; + +enum vk_format_colorspace +{ + VK_FORMAT_COLORSPACE_RGB = 0, + VK_FORMAT_COLORSPACE_SRGB = 1, + VK_FORMAT_COLORSPACE_YUV = 2, + VK_FORMAT_COLORSPACE_ZS = 3 +}; + +struct vk_format_channel_description +{ + unsigned type : 5; + unsigned normalized : 1; + unsigned pure_integer : 1; + unsigned scaled : 1; + unsigned size : 8; + unsigned shift : 16; +}; + +struct vk_format_description +{ + VkFormat format; + const char *name; + const char *short_name; + + struct vk_format_block block; + enum vk_format_layout layout; + + unsigned nr_channels : 3; + unsigned is_array : 1; + unsigned is_bitmask : 1; + unsigned is_mixed : 1; + + struct vk_format_channel_description channel[4]; + + unsigned char swizzle[4]; + + enum vk_format_colorspace colorspace; +}; + +extern const struct vk_format_description vk_format_description_table[]; + +const struct vk_format_description * +vk_format_description(VkFormat format); + +/** + * Return total bits needed for the pixel format per block. + */ +static inline unsigned +vk_format_get_blocksizebits(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + + assert(desc); + if (!desc) { + return 0; + } + + return desc->block.bits; +} + +/** + * Return bytes per block (not pixel) for the given format. + */ +static inline unsigned +vk_format_get_blocksize(VkFormat format) +{ + unsigned bits = vk_format_get_blocksizebits(format); + unsigned bytes = bits / 8; + + assert(bits % 8 == 0); + assert(bytes > 0); + if (bytes == 0) { + bytes = 1; + } + + return bytes; +} + +static inline unsigned +vk_format_get_blockwidth(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + + assert(desc); + if (!desc) { + return 1; + } + + return desc->block.width; +} + +static inline unsigned +vk_format_get_blockheight(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + + assert(desc); + if (!desc) { + return 1; + } + + return desc->block.height; +} + +static inline unsigned +vk_format_get_block_count_width(VkFormat format, unsigned width) +{ + unsigned blockwidth = vk_format_get_blockwidth(format); + return (width + blockwidth - 1) / blockwidth; +} + +static inline unsigned +vk_format_get_block_count_height(VkFormat format, unsigned height) +{ + unsigned blockheight = vk_format_get_blockheight(format); + return (height + blockheight - 1) / blockheight; +} + +static inline unsigned +vk_format_get_block_count(VkFormat format, unsigned width, unsigned height) +{ + return vk_format_get_block_count_width(format, width) * + vk_format_get_block_count_height(format, height); +} + +/** + * Return the index of the first non-void channel + * -1 if no non-void channels + */ +static inline int +vk_format_get_first_non_void_channel(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + int i; + + for (i = 0; i < 4; i++) + if (desc->channel[i].type != VK_FORMAT_TYPE_VOID) + break; + + if (i == 4) + return -1; + + return i; +} + +enum vk_swizzle +{ + VK_SWIZZLE_X, + VK_SWIZZLE_Y, + VK_SWIZZLE_Z, + VK_SWIZZLE_W, + VK_SWIZZLE_0, + VK_SWIZZLE_1, + VK_SWIZZLE_NONE, + VK_SWIZZLE_MAX, /**< Number of enums counter (must be last) */ +}; + +static inline VkImageAspectFlags +vk_format_aspects(VkFormat format) +{ + switch (format) { + case VK_FORMAT_UNDEFINED: + return 0; + + case VK_FORMAT_S8_UINT: + return VK_IMAGE_ASPECT_STENCIL_BIT; + + case VK_FORMAT_D16_UNORM_S8_UINT: + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + + case VK_FORMAT_D16_UNORM: + case VK_FORMAT_X8_D24_UNORM_PACK32: + case VK_FORMAT_D32_SFLOAT: + return VK_IMAGE_ASPECT_DEPTH_BIT; + + default: + return VK_IMAGE_ASPECT_COLOR_BIT; + } +} + +static inline enum vk_swizzle +tu_swizzle_conv(VkComponentSwizzle component, + const unsigned char chan[4], + VkComponentSwizzle vk_swiz) +{ + int x; + + if (vk_swiz == VK_COMPONENT_SWIZZLE_IDENTITY) + vk_swiz = component; + switch (vk_swiz) { + case VK_COMPONENT_SWIZZLE_ZERO: + return VK_SWIZZLE_0; + case VK_COMPONENT_SWIZZLE_ONE: + return VK_SWIZZLE_1; + case VK_COMPONENT_SWIZZLE_R: + for (x = 0; x < 4; x++) + if (chan[x] == 0) + return x; + return VK_SWIZZLE_0; + case VK_COMPONENT_SWIZZLE_G: + for (x = 0; x < 4; x++) + if (chan[x] == 1) + return x; + return VK_SWIZZLE_0; + case VK_COMPONENT_SWIZZLE_B: + for (x = 0; x < 4; x++) + if (chan[x] == 2) + return x; + return VK_SWIZZLE_0; + case VK_COMPONENT_SWIZZLE_A: + for (x = 0; x < 4; x++) + if (chan[x] == 3) + return x; + return VK_SWIZZLE_1; + default: + unreachable("Illegal swizzle"); + } +} + +static inline void +vk_format_compose_swizzles(const VkComponentMapping *mapping, + const unsigned char swz[4], + enum vk_swizzle dst[4]) +{ + dst[0] = tu_swizzle_conv(VK_COMPONENT_SWIZZLE_R, swz, mapping->r); + dst[1] = tu_swizzle_conv(VK_COMPONENT_SWIZZLE_G, swz, mapping->g); + dst[2] = tu_swizzle_conv(VK_COMPONENT_SWIZZLE_B, swz, mapping->b); + dst[3] = tu_swizzle_conv(VK_COMPONENT_SWIZZLE_A, swz, mapping->a); +} + +static inline bool +vk_format_is_compressed(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + + assert(desc); + if (!desc) { + return false; + } + + switch (desc->layout) { + case VK_FORMAT_LAYOUT_S3TC: + case VK_FORMAT_LAYOUT_RGTC: + case VK_FORMAT_LAYOUT_ETC: + case VK_FORMAT_LAYOUT_BPTC: + case VK_FORMAT_LAYOUT_ASTC: + /* XXX add other formats in the future */ + return true; + default: + return false; + } +} + +static inline bool +vk_format_has_depth(const struct vk_format_description *desc) +{ + return desc->colorspace == VK_FORMAT_COLORSPACE_ZS && + desc->swizzle[0] != VK_SWIZZLE_NONE; +} + +static inline bool +vk_format_has_stencil(const struct vk_format_description *desc) +{ + return desc->colorspace == VK_FORMAT_COLORSPACE_ZS && + desc->swizzle[1] != VK_SWIZZLE_NONE; +} + +static inline bool +vk_format_is_depth_or_stencil(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + + assert(desc); + if (!desc) { + return false; + } + + return vk_format_has_depth(desc) || vk_format_has_stencil(desc); +} + +static inline bool +vk_format_is_depth(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + + assert(desc); + if (!desc) { + return false; + } + + return vk_format_has_depth(desc); +} + +static inline bool +vk_format_is_stencil(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + + assert(desc); + if (!desc) { + return false; + } + + return vk_format_has_stencil(desc); +} + +static inline bool +vk_format_is_color(VkFormat format) +{ + return !vk_format_is_depth_or_stencil(format); +} + +static inline bool +vk_format_has_alpha(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + + return (desc->colorspace == VK_FORMAT_COLORSPACE_RGB || + desc->colorspace == VK_FORMAT_COLORSPACE_SRGB) && + desc->swizzle[3] != VK_SWIZZLE_1; +} + +static inline VkFormat +vk_format_depth_only(VkFormat format) +{ + switch (format) { + case VK_FORMAT_D16_UNORM_S8_UINT: + return VK_FORMAT_D16_UNORM; + case VK_FORMAT_D24_UNORM_S8_UINT: + return VK_FORMAT_X8_D24_UNORM_PACK32; + case VK_FORMAT_D32_SFLOAT_S8_UINT: + return VK_FORMAT_D32_SFLOAT; + default: + return format; + } +} + +static inline bool +vk_format_is_int(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + int channel = vk_format_get_first_non_void_channel(format); + + return channel >= 0 && desc->channel[channel].pure_integer; +} + +static inline bool +vk_format_is_srgb(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + return desc->colorspace == VK_FORMAT_COLORSPACE_SRGB; +} + +static inline VkFormat +vk_format_no_srgb(VkFormat format) +{ + switch (format) { + case VK_FORMAT_R8_SRGB: + return VK_FORMAT_R8_UNORM; + case VK_FORMAT_R8G8_SRGB: + return VK_FORMAT_R8G8_UNORM; + case VK_FORMAT_R8G8B8_SRGB: + return VK_FORMAT_R8G8B8_UNORM; + case VK_FORMAT_B8G8R8_SRGB: + return VK_FORMAT_B8G8R8_UNORM; + case VK_FORMAT_R8G8B8A8_SRGB: + return VK_FORMAT_R8G8B8A8_UNORM; + case VK_FORMAT_B8G8R8A8_SRGB: + return VK_FORMAT_B8G8R8A8_UNORM; + case VK_FORMAT_A8B8G8R8_SRGB_PACK32: + return VK_FORMAT_A8B8G8R8_UNORM_PACK32; + case VK_FORMAT_BC1_RGB_SRGB_BLOCK: + return VK_FORMAT_BC1_RGB_UNORM_BLOCK; + case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: + return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; + case VK_FORMAT_BC2_SRGB_BLOCK: + return VK_FORMAT_BC2_UNORM_BLOCK; + case VK_FORMAT_BC3_SRGB_BLOCK: + return VK_FORMAT_BC3_UNORM_BLOCK; + case VK_FORMAT_BC7_SRGB_BLOCK: + return VK_FORMAT_BC7_UNORM_BLOCK; + case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: + return VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK; + case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: + return VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK; + case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: + return VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK; + default: + assert(!vk_format_is_srgb(format)); + return format; + } +} + +static inline VkFormat +vk_format_stencil_only(VkFormat format) +{ + return VK_FORMAT_S8_UINT; +} + +static inline unsigned +vk_format_get_component_bits(VkFormat format, + enum vk_format_colorspace colorspace, + unsigned component) +{ + const struct vk_format_description *desc = vk_format_description(format); + enum vk_format_colorspace desc_colorspace; + + assert(format); + if (!format) { + return 0; + } + + assert(component < 4); + + /* Treat RGB and SRGB as equivalent. */ + if (colorspace == VK_FORMAT_COLORSPACE_SRGB) { + colorspace = VK_FORMAT_COLORSPACE_RGB; + } + if (desc->colorspace == VK_FORMAT_COLORSPACE_SRGB) { + desc_colorspace = VK_FORMAT_COLORSPACE_RGB; + } else { + desc_colorspace = desc->colorspace; + } + + if (desc_colorspace != colorspace) { + return 0; + } + + switch (desc->swizzle[component]) { + case VK_SWIZZLE_X: + return desc->channel[0].size; + case VK_SWIZZLE_Y: + return desc->channel[1].size; + case VK_SWIZZLE_Z: + return desc->channel[2].size; + case VK_SWIZZLE_W: + return desc->channel[3].size; + default: + return 0; + } +} + +static inline VkFormat +vk_to_non_srgb_format(VkFormat format) +{ + switch (format) { + case VK_FORMAT_R8_SRGB: + return VK_FORMAT_R8_UNORM; + case VK_FORMAT_R8G8_SRGB: + return VK_FORMAT_R8G8_UNORM; + case VK_FORMAT_R8G8B8_SRGB: + return VK_FORMAT_R8G8B8_UNORM; + case VK_FORMAT_B8G8R8_SRGB: + return VK_FORMAT_B8G8R8_UNORM; + case VK_FORMAT_R8G8B8A8_SRGB: + return VK_FORMAT_R8G8B8A8_UNORM; + case VK_FORMAT_B8G8R8A8_SRGB: + return VK_FORMAT_B8G8R8A8_UNORM; + case VK_FORMAT_A8B8G8R8_SRGB_PACK32: + return VK_FORMAT_A8B8G8R8_UNORM_PACK32; + default: + return format; + } +} + +static inline unsigned +vk_format_get_nr_components(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + return desc->nr_channels; +} + +#endif /* VK_FORMAT_H */ diff --git a/lib/mesa/src/freedreno/vulkan/vk_format_layout.csv b/lib/mesa/src/freedreno/vulkan/vk_format_layout.csv new file mode 100644 index 000000000..f9c2e6f7c --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/vk_format_layout.csv @@ -0,0 +1,188 @@ +/* this is pretty much taken from the gallium one. */ + + +VK_FORMAT_UNDEFINED , plain, 1, 1, u8 , , , , x001, rgb +VK_FORMAT_R4G4_UNORM_PACK8 , plain, 1, 1, un4 , un4 , , , xy01, rgb +VK_FORMAT_R4G4B4A4_UNORM_PACK16 , plain, 1, 1, un4 , un4 , un4 , un4 , wzyx, rgb +VK_FORMAT_B4G4R4A4_UNORM_PACK16 , plain, 1, 1, un4 , un4 , un4 , un4 , wxyz, rgb +VK_FORMAT_R5G6B5_UNORM_PACK16 , plain, 1, 1, un5 , un6 , un5 , , zyx1, rgb +VK_FORMAT_B5G6R5_UNORM_PACK16 , plain, 1, 1, un5 , un6 , un5 , , xyz1, rgb +VK_FORMAT_R5G5B5A1_UNORM_PACK16 , plain, 1, 1, un1 , un5 , un5 , un5 , wzyx, rgb +VK_FORMAT_B5G5R5A1_UNORM_PACK16 , plain, 1, 1, un1 , un5 , un5 , un5 , wxyz, rgb +VK_FORMAT_A1R5G5B5_UNORM_PACK16 , plain, 1, 1, un5 , un5 , un5 , un1 , zyxw, rgb +VK_FORMAT_R8_UNORM , plain, 1, 1, un8 , , , , x001, rgb +VK_FORMAT_R8_SNORM , plain, 1, 1, sn8 , , , , x001, rgb +VK_FORMAT_R8_USCALED , plain, 1, 1, us8 , , , , x001, rgb +VK_FORMAT_R8_SSCALED , plain, 1, 1, ss8 , , , , x001, rgb +VK_FORMAT_R8_UINT , plain, 1, 1, up8 , , , , x001, rgb +VK_FORMAT_R8_SINT , plain, 1, 1, sp8 , , , , x001, rgb +VK_FORMAT_R8_SRGB , plain, 1, 1, un8 , , , , x001, srgb +VK_FORMAT_R8G8_UNORM , plain, 1, 1, un8 , un8 , , , xy01, rgb +VK_FORMAT_R8G8_SNORM , plain, 1, 1, sn8 , sn8 , , , xy01, rgb +VK_FORMAT_R8G8_USCALED , plain, 1, 1, us8 , us8 , , , xy01, rgb +VK_FORMAT_R8G8_SSCALED , plain, 1, 1, ss8 , ss8 , , , xy01, rgb +VK_FORMAT_R8G8_UINT , plain, 1, 1, up8 , up8 , , , xy01, rgb +VK_FORMAT_R8G8_SINT , plain, 1, 1, sp8 , sp8 , , , xy01, rgb +VK_FORMAT_R8G8_SRGB , plain, 1, 1, un8 , un8 , , , xy01, srgb +VK_FORMAT_R8G8B8_UNORM , plain, 1, 1, un8 , un8 , un8 , , xyz1, rgb +VK_FORMAT_R8G8B8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , , xyz1, rgb +VK_FORMAT_R8G8B8_USCALED , plain, 1, 1, us8 , us8 , us8 , , xyz1, rgb +VK_FORMAT_R8G8B8_SSCALED , plain, 1, 1, ss8 , ss8 , ss8 , , xyz1, rgb +VK_FORMAT_R8G8B8_UINT , plain, 1, 1, up8 , up8 , up8 , , xyz1, rgb +VK_FORMAT_R8G8B8_SINT , plain, 1, 1, sp8 , sp8 , sp8 , , xyz1, rgb +VK_FORMAT_R8G8B8_SRGB , plain, 1, 1, un8 , un8 , un8 , , xyz1, srgb +VK_FORMAT_B8G8R8_UNORM , plain, 1, 1, un8 , un8 , un8 , , zyx1, rgb +VK_FORMAT_B8G8R8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , , zyx1, rgb +VK_FORMAT_B8G8R8_USCALED , plain, 1, 1, us8 , us8 , us8 , , zyx1, rgb +VK_FORMAT_B8G8R8_SSCALED , plain, 1, 1, ss8 , ss8 , ss8 , , zyx1, rgb +VK_FORMAT_B8G8R8_UINT , plain, 1, 1, up8 , up8 , up8 , , zyx1, rgb +VK_FORMAT_B8G8R8_SINT , plain, 1, 1, sp8 , sp8 , sp8 , , zyx1, rgb +VK_FORMAT_B8G8R8_SRGB , plain, 1, 1, un8 , un8 , un8 , , zyx1, srgb +VK_FORMAT_R8G8B8A8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , xyzw, rgb +VK_FORMAT_R8G8B8A8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , sn8 , xyzw, rgb +VK_FORMAT_R8G8B8A8_USCALED , plain, 1, 1, us8 , us8 , us8 , us8 , xyzw, rgb +VK_FORMAT_R8G8B8A8_SSCALED , plain, 1, 1, ss8 , ss8 , ss8 , ss8 , xyzw, rgb +VK_FORMAT_R8G8B8A8_UINT , plain, 1, 1, up8 , up8 , up8 , up8 , xyzw, rgb +VK_FORMAT_R8G8B8A8_SINT , plain, 1, 1, sp8 , sp8 , sp8 , sp8 , xyzw, rgb +VK_FORMAT_R8G8B8A8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , xyzw, srgb +VK_FORMAT_B8G8R8A8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , zyxw, rgb +VK_FORMAT_B8G8R8A8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , sn8 , zyxw, rgb +VK_FORMAT_B8G8R8A8_USCALED , plain, 1, 1, us8 , us8 , us8 , us8 , zyxw, rgb +VK_FORMAT_B8G8R8A8_SSCALED , plain, 1, 1, ss8 , ss8 , ss8 , ss8 , zyxw, rgb +VK_FORMAT_B8G8R8A8_UINT , plain, 1, 1, up8 , up8 , up8 , up8 , zyxw, rgb +VK_FORMAT_B8G8R8A8_SINT , plain, 1, 1, sp8 , sp8 , sp8 , sp8 , zyxw, rgb +VK_FORMAT_B8G8R8A8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , zyxw, srgb +VK_FORMAT_A8B8G8R8_UNORM_PACK32 , plain, 1, 1, un8 , un8 , un8 , un8 , xyzw, rgb +VK_FORMAT_A8B8G8R8_SNORM_PACK32 , plain, 1, 1, sn8 , sn8 , sn8 , sn8 , xyzw, rgb +VK_FORMAT_A8B8G8R8_USCALED_PACK32 , plain, 1, 1, us8 , us8 , us8 , us8 , xyzw, rgb +VK_FORMAT_A8B8G8R8_SSCALED_PACK32 , plain, 1, 1, ss8 , ss8 , ss8 , ss8 , xyzw, rgb +VK_FORMAT_A8B8G8R8_UINT_PACK32 , plain, 1, 1, up8 , up8 , up8 , up8 , xyzw, rgb +VK_FORMAT_A8B8G8R8_SINT_PACK32 , plain, 1, 1, sp8 , sp8 , sp8 , sp8 , xyzw, rgb +VK_FORMAT_A8B8G8R8_SRGB_PACK32 , plain, 1, 1, un8 , un8 , un8 , un8 , xyzw, srgb +VK_FORMAT_A2R10G10B10_UNORM_PACK32 , plain, 1, 1, un10, un10, un10, un2 , zyxw, rgb +VK_FORMAT_A2R10G10B10_SNORM_PACK32 , plain, 1, 1, sn10, sn10, sn10, sn2 , zyxw, rgb +VK_FORMAT_A2R10G10B10_USCALED_PACK32 , plain, 1, 1, us10, us10, us10, us2 , zyxw, rgb +VK_FORMAT_A2R10G10B10_SSCALED_PACK32 , plain, 1, 1, ss10, ss10, ss10, ss2 , zyxw, rgb +VK_FORMAT_A2R10G10B10_UINT_PACK32 , plain, 1, 1, up10, up10, up10, up2 , zyxw, rgb +VK_FORMAT_A2R10G10B10_SINT_PACK32 , plain, 1, 1, sp10, sp10, sp10, sp2 , zyxw, rgb +VK_FORMAT_A2B10G10R10_UNORM_PACK32 , plain, 1, 1, un10, un10, un10, un2 , xyzw, rgb +VK_FORMAT_A2B10G10R10_SNORM_PACK32 , plain, 1, 1, sn10, sn10, sn10, sn2 , xyzw, rgb +VK_FORMAT_A2B10G10R10_USCALED_PACK32 , plain, 1, 1, us10, us10, us10, us2 , xyzw, rgb +VK_FORMAT_A2B10G10R10_SSCALED_PACK32 , plain, 1, 1, ss10, ss10, ss10, ss2 , xyzw, rgb +VK_FORMAT_A2B10G10R10_UINT_PACK32 , plain, 1, 1, up10, up10, up10, up2 , xyzw, rgb +VK_FORMAT_A2B10G10R10_SINT_PACK32 , plain, 1, 1, sp10, sp10, sp10, sp2 , xyzw, rgb +VK_FORMAT_R16_UNORM , plain, 1, 1, un16, , , , x001, rgb +VK_FORMAT_R16_SNORM , plain, 1, 1, sn16, , , , x001, rgb +VK_FORMAT_R16_USCALED , plain, 1, 1, us16, , , , x001, rgb +VK_FORMAT_R16_SSCALED , plain, 1, 1, ss16, , , , x001, rgb +VK_FORMAT_R16_UINT , plain, 1, 1, up16, , , , x001, rgb +VK_FORMAT_R16_SINT , plain, 1, 1, sp16, , , , x001, rgb +VK_FORMAT_R16_SFLOAT , plain, 1, 1, f16 , , , , x001, rgb +VK_FORMAT_R16G16_UNORM , plain, 1, 1, un16, un16, , , xy01, rgb +VK_FORMAT_R16G16_SNORM , plain, 1, 1, sn16, sn16, , , xy01, rgb +VK_FORMAT_R16G16_USCALED , plain, 1, 1, us16, us16, , , xy01, rgb +VK_FORMAT_R16G16_SSCALED , plain, 1, 1, ss16, ss16, , , xy01, rgb +VK_FORMAT_R16G16_UINT , plain, 1, 1, up16, up16, , , xy01, rgb +VK_FORMAT_R16G16_SINT , plain, 1, 1, sp16, sp16, , , xy01, rgb +VK_FORMAT_R16G16_SFLOAT , plain, 1, 1, f16 , f16 , , , xy01, rgb +VK_FORMAT_R16G16B16_UNORM , plain, 1, 1, un16, un16, un16, , xyz1, rgb +VK_FORMAT_R16G16B16_SNORM , plain, 1, 1, sn16, sn16, sn16, , xyz1, rgb +VK_FORMAT_R16G16B16_USCALED , plain, 1, 1, us16, us16, us16, , xyz1, rgb +VK_FORMAT_R16G16B16_SSCALED , plain, 1, 1, ss16, ss16, ss16, , xyz1, rgb +VK_FORMAT_R16G16B16_UINT , plain, 1, 1, up16, up16, up16, , xyz1, rgb +VK_FORMAT_R16G16B16_SINT , plain, 1, 1, sp16, sp16, sp16, , xyz1, rgb +VK_FORMAT_R16G16B16_SFLOAT , plain, 1, 1, f16 , f16 , f16 , , xyz1, rgb +VK_FORMAT_R16G16B16A16_UNORM , plain, 1, 1, un16, un16, un16, un16, xyzw, rgb +VK_FORMAT_R16G16B16A16_SNORM , plain, 1, 1, sn16, sn16, sn16, sn16, xyzw, rgb +VK_FORMAT_R16G16B16A16_USCALED , plain, 1, 1, us16, us16, us16, us16, xyzw, rgb +VK_FORMAT_R16G16B16A16_SSCALED , plain, 1, 1, ss16, ss16, ss16, ss16, xyzw, rgb +VK_FORMAT_R16G16B16A16_UINT , plain, 1, 1, up16, up16, up16, up16, xyzw, rgb +VK_FORMAT_R16G16B16A16_SINT , plain, 1, 1, sp16, sp16, sp16, sp16, xyzw, rgb +VK_FORMAT_R16G16B16A16_SFLOAT , plain, 1, 1, f16 , f16 , f16 , f16 , xyzw, rgb +VK_FORMAT_R32_UINT , plain, 1, 1, up32, , , , x001, rgb +VK_FORMAT_R32_SINT , plain, 1, 1, sp32, , , , x001, rgb +VK_FORMAT_R32_SFLOAT , plain, 1, 1, f32 , , , , x001, rgb +VK_FORMAT_R32G32_UINT , plain, 1, 1, up32, up32, , , xy01, rgb +VK_FORMAT_R32G32_SINT , plain, 1, 1, sp32, sp32, , , xy01, rgb +VK_FORMAT_R32G32_SFLOAT , plain, 1, 1, f32 , f32 , , , xy01, rgb +VK_FORMAT_R32G32B32_UINT , plain, 1, 1, up32, up32, up32, , xyz1, rgb +VK_FORMAT_R32G32B32_SINT , plain, 1, 1, sp32, sp32, sp32, , xyz1, rgb +VK_FORMAT_R32G32B32_SFLOAT , plain, 1, 1, f32 , f32 , f32 , , xyz1, rgb +VK_FORMAT_R32G32B32A32_UINT , plain, 1, 1, up32, up32, up32, up32, xyzw, rgb +VK_FORMAT_R32G32B32A32_SINT , plain, 1, 1, sp32, sp32, sp32, sp32, xyzw, rgb +VK_FORMAT_R32G32B32A32_SFLOAT , plain, 1, 1, f32 , f32 , f32 , f32 , xyzw, rgb +VK_FORMAT_R64_UINT , plain, 1, 1, up64, , , , x001, rgb +VK_FORMAT_R64_SINT , plain, 1, 1, sp64, , , , x001, rgb +VK_FORMAT_R64_SFLOAT , plain, 1, 1, f64 , , , , x001, rgb +VK_FORMAT_R64G64_UINT , plain, 1, 1, up64, up64, , , xy01, rgb +VK_FORMAT_R64G64_SINT , plain, 1, 1, sp64, sp64, , , xy01, rgb +VK_FORMAT_R64G64_SFLOAT , plain, 1, 1, f64 , f64 , , , xy01, rgb +VK_FORMAT_R64G64B64_UINT , plain, 1, 1, up64, up64, up64, , xyz1, rgb +VK_FORMAT_R64G64B64_SINT , plain, 1, 1, sp64, sp64, sp64, , xyz1, rgb +VK_FORMAT_R64G64B64_SFLOAT , plain, 1, 1, f64 , f64 , f64 , , xyz1, rgb +VK_FORMAT_R64G64B64A64_UINT , plain, 1, 1, up64, up64, up64, up64, xyzw, rgb +VK_FORMAT_R64G64B64A64_SINT , plain, 1, 1, sp64, sp64, sp64, sp64, xyzw, rgb +VK_FORMAT_R64G64B64A64_SFLOAT , plain, 1, 1, f64 , f64 , f64 , f64 , xyzw, rgb +VK_FORMAT_B10G11R11_UFLOAT_PACK32 , other, 1, 1, x32 , , , , xyz1, rgb +VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 , other, 1, 1, x32 , , , , xyz1, rgb +VK_FORMAT_D16_UNORM , plain, 1, 1, un16, , , , x___, zs +VK_FORMAT_X8_D24_UNORM_PACK32 , plain, 1, 1, un24, x8 , , , x___, zs +VK_FORMAT_D32_SFLOAT , plain, 1, 1, f32 , , , , x___, zs +VK_FORMAT_S8_UINT , plain, 1, 1, up8 , , , , _x__, zs +VK_FORMAT_D16_UNORM_S8_UINT , plain, 1, 1, un16, up8 , , , xy__, zs +VK_FORMAT_D24_UNORM_S8_UINT , plain, 1, 1, un24, up8 , , , xy__, zs +VK_FORMAT_D32_SFLOAT_S8_UINT , plain, 1, 1, f32 , up8 , , , xy__, zs +VK_FORMAT_BC1_RGB_UNORM_BLOCK , s3tc, 4, 4, x64 , , , , xyz1, rgb +VK_FORMAT_BC1_RGB_SRGB_BLOCK , s3tc, 4, 4, x64 , , , , xyz1, srgb +VK_FORMAT_BC1_RGBA_UNORM_BLOCK , s3tc, 4, 4, x64 , , , , xyzw, rgb +VK_FORMAT_BC1_RGBA_SRGB_BLOCK , s3tc, 4, 4, x64 , , , , xyzw, srgb +VK_FORMAT_BC2_UNORM_BLOCK , s3tc, 4, 4, x128, , , , xyzw, rgb +VK_FORMAT_BC2_SRGB_BLOCK , s3tc, 4, 4, x128, , , , xyzw, srgb +VK_FORMAT_BC3_UNORM_BLOCK , s3tc, 4, 4, x128, , , , xyzw, rgb +VK_FORMAT_BC3_SRGB_BLOCK , s3tc, 4, 4, x128, , , , xyzw, srgb +VK_FORMAT_BC4_UNORM_BLOCK , rgtc, 4, 4, x64, , , , x001, rgb +VK_FORMAT_BC4_SNORM_BLOCK , rgtc, 4, 4, x64, , , , x001, rgb +VK_FORMAT_BC5_UNORM_BLOCK , rgtc, 4, 4, x128, , , , xy01, rgb +VK_FORMAT_BC5_SNORM_BLOCK , rgtc, 4, 4, x128, , , , xy01, rgb +VK_FORMAT_BC6H_UFLOAT_BLOCK , bptc, 4, 4, x128, , , , xyz1, rgb +VK_FORMAT_BC6H_SFLOAT_BLOCK , bptc, 4, 4, x128, , , , xyz1, rgb +VK_FORMAT_BC7_UNORM_BLOCK , bptc, 4, 4, x128, , , , xyzw, rgb +VK_FORMAT_BC7_SRGB_BLOCK , bptc, 4, 4, x128, , , , xyzw, srgb +VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK , etc, 4, 4, x64, , , , xyz1, rgb +VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK , etc, 4, 4, x64, , , , xyz1, srgb +VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK , etc, 4, 4, x64, , , , xyzw, rgb +VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK , etc, 4, 4, x64, , , , xyzw, srgb +VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK , etc, 4, 4, x128, , , , xyzw, rgb +VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK , etc, 4, 4, x128, , , , xyzw, srgb +VK_FORMAT_EAC_R11_UNORM_BLOCK , etc, 4, 4, x64, , , , x001, rgb +VK_FORMAT_EAC_R11_SNORM_BLOCK , etc, 4, 4, x64, , , , x001, rgb +VK_FORMAT_EAC_R11G11_UNORM_BLOCK , etc, 4, 4, x128, , , , xy01, rgb +VK_FORMAT_EAC_R11G11_SNORM_BLOCK , etc, 4, 4, x128, , , , xy01, rgb +VK_FORMAT_ASTC_4x4_UNORM_BLOCK, +VK_FORMAT_ASTC_4x4_SRGB_BLOCK, +VK_FORMAT_ASTC_5x4_UNORM_BLOCK, +VK_FORMAT_ASTC_5x4_SRGB_BLOCK, +VK_FORMAT_ASTC_5x5_UNORM_BLOCK, +VK_FORMAT_ASTC_5x5_SRGB_BLOCK, +VK_FORMAT_ASTC_6x5_UNORM_BLOCK, +VK_FORMAT_ASTC_6x5_SRGB_BLOCK, +VK_FORMAT_ASTC_6x6_UNORM_BLOCK, +VK_FORMAT_ASTC_6x6_SRGB_BLOCK, +VK_FORMAT_ASTC_8x5_UNORM_BLOCK, +VK_FORMAT_ASTC_8x5_SRGB_BLOCK, +VK_FORMAT_ASTC_8x6_UNORM_BLOCK, +VK_FORMAT_ASTC_8x6_SRGB_BLOCK, +VK_FORMAT_ASTC_8x8_UNORM_BLOCK, +VK_FORMAT_ASTC_8x8_SRGB_BLOCK, +VK_FORMAT_ASTC_10x5_UNORM_BLOCK, +VK_FORMAT_ASTC_10x5_SRGB_BLOCK, +VK_FORMAT_ASTC_10x6_UNORM_BLOCK, +VK_FORMAT_ASTC_10x6_SRGB_BLOCK, +VK_FORMAT_ASTC_10x8_UNORM_BLOCK, +VK_FORMAT_ASTC_10x8_SRGB_BLOCK, +VK_FORMAT_ASTC_10x10_UNORM_BLOCK, +VK_FORMAT_ASTC_10x10_SRGB_BLOCK, +VK_FORMAT_ASTC_12x10_UNORM_BLOCK, +VK_FORMAT_ASTC_12x10_SRGB_BLOCK, +VK_FORMAT_ASTC_12x12_UNORM_BLOCK, +VK_FORMAT_ASTC_12x12_SRGB_BLOCK, diff --git a/lib/mesa/src/freedreno/vulkan/vk_format_parse.py b/lib/mesa/src/freedreno/vulkan/vk_format_parse.py new file mode 100644 index 000000000..8f3823c80 --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/vk_format_parse.py @@ -0,0 +1,388 @@ + +''' +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +''' + + +VOID, UNSIGNED, SIGNED, FIXED, FLOAT = range(5) + +SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_0, SWIZZLE_1, SWIZZLE_NONE, = range(7) + +PLAIN = 'plain' +SCALED = 'scaled' + +RGB = 'rgb' +SRGB = 'srgb' +YUV = 'yuv' +ZS = 'zs' + + +def is_pot(x): + return (x & (x - 1)) == 0 + + +VERY_LARGE = 99999999999999999999999 + + +class Channel: + '''Describe the channel of a color channel.''' + + def __init__(self, type, norm, pure, scaled, size, name = ''): + self.type = type + self.norm = norm + self.pure = pure + self.size = size + self.scaled = scaled + self.sign = type in (SIGNED, FIXED, FLOAT) + self.name = name + + def __str__(self): + s = str(self.type) + if self.norm: + s += 'n' + if self.pure: + s += 'p' + if self.scaled: + s += 's' + s += str(self.size) + return s + + def __eq__(self, other): + return (other is not None and + self.type == other.type and + self.norm == other.norm and + self.pure == other.pure and + self.size == other.size and + self.scaled == other.scaled) + + def max(self): + '''Maximum representable number.''' + if self.type == FLOAT: + return VERY_LARGE + if self.type == FIXED: + return (1 << (self.size/2)) - 1 + if self.norm: + return 1 + if self.type == UNSIGNED: + return (1 << self.size) - 1 + if self.type == SIGNED: + return (1 << (self.size - 1)) - 1 + assert False + + def min(self): + '''Minimum representable number.''' + if self.type == FLOAT: + return -VERY_LARGE + if self.type == FIXED: + return -(1 << (self.size/2)) + if self.type == UNSIGNED: + return 0 + if self.norm: + return -1 + if self.type == SIGNED: + return -(1 << (self.size - 1)) + assert False + + +class Format: + '''Describe a pixel format.''' + + def __init__(self, name, layout, block_width, block_height, le_channels, le_swizzles, be_channels, be_swizzles, colorspace): + self.name = name + self.layout = layout + self.block_width = block_width + self.block_height = block_height + self.le_channels = le_channels + self.le_swizzles = le_swizzles + self.be_channels = be_channels + self.be_swizzles = be_swizzles + self.name = name + self.colorspace = colorspace + + def __str__(self): + return self.name + + def short_name(self): + '''Make up a short norm for a format, suitable to be used as suffix in + function names.''' + + name = self.name + if name.startswith('VK_FORMAT_'): + name = name[len('VK_FORMAT_'):] + name = name.lower() + return name + + def block_size(self): + size = 0 + for channel in self.le_channels: + size += channel.size + return size + + def nr_channels(self): + nr_channels = 0 + for channel in self.le_channels: + if channel.size: + nr_channels += 1 + return nr_channels + + def array_element(self): + if self.layout != PLAIN: + return None + ref_channel = self.le_channels[0] + if ref_channel.type == VOID: + ref_channel = self.le_channels[1] + for channel in self.le_channels: + if channel.size and (channel.size != ref_channel.size or channel.size % 8): + return None + if channel.type != VOID: + if channel.type != ref_channel.type: + return None + if channel.norm != ref_channel.norm: + return None + if channel.pure != ref_channel.pure: + return None + if channel.scaled != ref_channel.scaled: + return None + return ref_channel + + def is_array(self): + return self.array_element() != None + + def is_mixed(self): + if self.layout != PLAIN: + return False + ref_channel = self.le_channels[0] + if ref_channel.type == VOID: + ref_channel = self.le_channels[1] + for channel in self.le_channels[1:]: + if channel.type != VOID: + if channel.type != ref_channel.type: + return True + if channel.norm != ref_channel.norm: + return True + if channel.pure != ref_channel.pure: + return True + if channel.scaled != ref_channel.scaled: + return True + return False + + def is_pot(self): + return is_pot(self.block_size()) + + def is_int(self): + if self.layout != PLAIN: + return False + for channel in self.le_channels: + if channel.type not in (VOID, UNSIGNED, SIGNED): + return False + return True + + def is_float(self): + if self.layout != PLAIN: + return False + for channel in self.le_channels: + if channel.type not in (VOID, FLOAT): + return False + return True + + def is_bitmask(self): + if self.layout != PLAIN: + return False + if self.block_size() not in (8, 16, 32): + return False + for channel in self.le_channels: + if channel.type not in (VOID, UNSIGNED, SIGNED): + return False + return True + + def is_pure_color(self): + if self.layout != PLAIN or self.colorspace == ZS: + return False + pures = [channel.pure + for channel in self.le_channels + if channel.type != VOID] + for x in pures: + assert x == pures[0] + return pures[0] + + def channel_type(self): + types = [channel.type + for channel in self.le_channels + if channel.type != VOID] + for x in types: + assert x == types[0] + return types[0] + + def is_pure_signed(self): + return self.is_pure_color() and self.channel_type() == SIGNED + + def is_pure_unsigned(self): + return self.is_pure_color() and self.channel_type() == UNSIGNED + + def has_channel(self, id): + return self.le_swizzles[id] != SWIZZLE_NONE + + def has_depth(self): + return self.colorspace == ZS and self.has_channel(0) + + def has_stencil(self): + return self.colorspace == ZS and self.has_channel(1) + + def stride(self): + return self.block_size()/8 + + +_type_parse_map = { + '': VOID, + 'x': VOID, + 'u': UNSIGNED, + 's': SIGNED, + 'h': FIXED, + 'f': FLOAT, +} + +_swizzle_parse_map = { + 'x': SWIZZLE_X, + 'y': SWIZZLE_Y, + 'z': SWIZZLE_Z, + 'w': SWIZZLE_W, + '0': SWIZZLE_0, + '1': SWIZZLE_1, + '_': SWIZZLE_NONE, +} + +def _parse_channels(fields, layout, colorspace, swizzles): + if layout == PLAIN: + names = ['']*4 + if colorspace in (RGB, SRGB): + for i in range(4): + swizzle = swizzles[i] + if swizzle < 4: + names[swizzle] += 'rgba'[i] + elif colorspace == ZS: + for i in range(4): + swizzle = swizzles[i] + if swizzle < 4: + names[swizzle] += 'zs'[i] + else: + assert False + for i in range(4): + if names[i] == '': + names[i] = 'x' + else: + names = ['x', 'y', 'z', 'w'] + + channels = [] + for i in range(0, 4): + field = fields[i] + if field: + type = _type_parse_map[field[0]] + if field[1] == 'n': + norm = True + pure = False + scaled = False + size = int(field[2:]) + elif field[1] == 'p': + pure = True + norm = False + scaled = False + size = int(field[2:]) + elif field[1] == 's': + pure = False + norm = False + scaled = True + size = int(field[2:]) + else: + norm = False + pure = False + scaled = False + size = int(field[1:]) + else: + type = VOID + norm = False + pure = False + scaled = False + size = 0 + channel = Channel(type, norm, pure, scaled, size, names[i]) + channels.append(channel) + + return channels + +def parse(filename): + '''Parse the format description in CSV format in terms of the + Channel and Format classes above.''' + + stream = open(filename) + formats = [] + for line in stream: + try: + comment = line.index('#') + except ValueError: + pass + else: + line = line[:comment] + line = line.strip() + if not line: + continue + + fields = [field.strip() for field in line.split(',')] + if len (fields) < 10: + continue + if len (fields) == 10: + fields += fields[4:9] + assert len (fields) == 15 + + name = fields[0] + layout = fields[1] + block_width, block_height = map(int, fields[2:4]) + colorspace = fields[9] + + le_swizzles = [_swizzle_parse_map[swizzle] for swizzle in fields[8]] + le_channels = _parse_channels(fields[4:8], layout, colorspace, le_swizzles) + + be_swizzles = [_swizzle_parse_map[swizzle] for swizzle in fields[14]] + be_channels = _parse_channels(fields[10:14], layout, colorspace, be_swizzles) + + le_shift = 0 + for channel in le_channels: + channel.shift = le_shift + le_shift += channel.size + + be_shift = 0 + for channel in be_channels[3::-1]: + channel.shift = be_shift + be_shift += channel.size + + assert le_shift == be_shift + for i in range(4): + assert (le_swizzles[i] != SWIZZLE_NONE) == (be_swizzles[i] != SWIZZLE_NONE) + + format = Format(name, layout, block_width, block_height, le_channels, le_swizzles, be_channels, be_swizzles, colorspace) + formats.append(format) + return formats + diff --git a/lib/mesa/src/freedreno/vulkan/vk_format_table.py b/lib/mesa/src/freedreno/vulkan/vk_format_table.py new file mode 100644 index 000000000..604aac8fa --- /dev/null +++ b/lib/mesa/src/freedreno/vulkan/vk_format_table.py @@ -0,0 +1,173 @@ +from __future__ import print_function + +CopyRight = ''' +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +''' + + +import sys + +from vk_format_parse import * + +def layout_map(layout): + return 'VK_FORMAT_LAYOUT_' + str(layout).upper() + + +def colorspace_map(colorspace): + return 'VK_FORMAT_COLORSPACE_' + str(colorspace).upper() + + +colorspace_channels_map = { + 'rgb': ['r', 'g', 'b', 'a'], + 'srgb': ['sr', 'sg', 'sb', 'a'], + 'zs': ['z', 's'], + 'yuv': ['y', 'u', 'v'], +} + + +type_map = { + VOID: "VK_FORMAT_TYPE_VOID", + UNSIGNED: "VK_FORMAT_TYPE_UNSIGNED", + SIGNED: "VK_FORMAT_TYPE_SIGNED", + FIXED: "VK_FORMAT_TYPE_FIXED", + FLOAT: "VK_FORMAT_TYPE_FLOAT", +} + + +def bool_map(value): + if value: + return "true" + else: + return "false" + + +swizzle_map = { + SWIZZLE_X: "VK_SWIZZLE_X", + SWIZZLE_Y: "VK_SWIZZLE_Y", + SWIZZLE_Z: "VK_SWIZZLE_Z", + SWIZZLE_W: "VK_SWIZZLE_W", + SWIZZLE_0: "VK_SWIZZLE_0", + SWIZZLE_1: "VK_SWIZZLE_1", + SWIZZLE_NONE: "VK_SWIZZLE_NONE", +} + +def print_channels(format, func): + if format.nr_channels() <= 1: + func(format.le_channels, format.le_swizzles) + else: + print('#ifdef PIPE_ARCH_BIG_ENDIAN') + func(format.be_channels, format.be_swizzles) + print('#else') + func(format.le_channels, format.le_swizzles) + print('#endif') + +def write_format_table(formats): + print('/* This file is autogenerated by vk_format_table.py from vk_format_layout.csv. Do not edit directly. */') + print() + # This will print the copyright message on the top of this file + print(CopyRight.strip()) + print() + print('#include "stdbool.h"') + print('#include "vk_format.h"') + print() + + def do_channel_array(channels, swizzles): + print(" {") + for i in range(4): + channel = channels[i] + if i < 3: + sep = "," + else: + sep = "" + if channel.size: + print(" {%s, %s, %s, %s, %u, %u}%s\t/* %s = %s */" % (type_map[channel.type], bool_map(channel.norm), bool_map(channel.pure), bool_map(channel.scaled), channel.size, channel.shift, sep, "xyzw"[i], channel.name)) + else: + print(" {0, 0, 0, 0, 0}%s" % (sep,)) + print(" },") + + def do_swizzle_array(channels, swizzles): + print(" {") + for i in range(4): + swizzle = swizzles[i] + if i < 3: + sep = "," + else: + sep = "" + try: + comment = colorspace_channels_map[format.colorspace][i] + except (KeyError, IndexError): + comment = 'ignored' + print(" %s%s\t/* %s */" % (swizzle_map[swizzle], sep, comment)) + print(" },") + + for format in formats: + print('static const struct vk_format_description') + print('vk_format_%s_description = {' % (format.short_name(),)) + print(" %s," % (format.name,)) + print(" \"%s\"," % (format.name,)) + print(" \"%s\"," % (format.short_name(),)) + print(" {%u, %u, %u},\t/* block */" % (format.block_width, format.block_height, format.block_size())) + print(" %s," % (layout_map(format.layout),)) + print(" %u,\t/* nr_channels */" % (format.nr_channels(),)) + print(" %s,\t/* is_array */" % (bool_map(format.is_array()),)) + print(" %s,\t/* is_bitmask */" % (bool_map(format.is_bitmask()),)) + print(" %s,\t/* is_mixed */" % (bool_map(format.is_mixed()),)) + print_channels(format, do_channel_array) + print_channels(format, do_swizzle_array) + print(" %s," % (colorspace_map(format.colorspace),)) + print("};") + print() + + print("const struct vk_format_description *") + print("vk_format_description(VkFormat format)") + print("{") + print(" if (format > VK_FORMAT_END_RANGE) {") + print(" return NULL;") + print(" }") + print() + print(" switch (format) {") + for format in formats: + print(" case %s:" % format.name) + print(" return &vk_format_%s_description;" % (format.short_name(),)) + print(" default:") + print(" return NULL;") + print(" }") + print("}") + print() + + +def main(): + + formats = [] + for arg in sys.argv[1:]: + formats.extend(parse(arg)) + write_format_table(formats) + + +if __name__ == '__main__': + main() |