diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-09-02 05:47:02 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-09-02 05:47:02 +0000 |
commit | 0dbbf1e0708df85a357d70e2708c0a11aeb5480e (patch) | |
tree | 6656ff8eb8b15a2fc1c02888973caf618388cfd0 /lib/mesa/src/freedreno | |
parent | 5f66494d31f735486b8222ecfa0a0c9046e92543 (diff) |
Merge Mesa 22.1.7
Diffstat (limited to 'lib/mesa/src/freedreno')
24 files changed, 1384 insertions, 4098 deletions
diff --git a/lib/mesa/src/freedreno/afuc/Makefile b/lib/mesa/src/freedreno/afuc/Makefile deleted file mode 100644 index 12e6f3aeb..000000000 --- a/lib/mesa/src/freedreno/afuc/Makefile +++ /dev/null @@ -1,368 +0,0 @@ -# CMAKE generated file: DO NOT EDIT! -# Generated by "Unix Makefiles" Generator, CMake Version 3.17 - -# Default target executed when no arguments are given to make. -default_target: all - -.PHONY : default_target - -# Allow only one "make -f Makefile2" at a time, but pass parallelism. -.NOTPARALLEL: - - -#============================================================================= -# Special targets provided by cmake. - -# Disable implicit rules so canonical targets will work. -.SUFFIXES: - - -# Disable VCS-based implicit rules. -% : %,v - - -# Disable VCS-based implicit rules. -% : RCS/% - - -# Disable VCS-based implicit rules. -% : RCS/%,v - - -# Disable VCS-based implicit rules. -% : SCCS/s.% - - -# Disable VCS-based implicit rules. -% : s.% - - -.SUFFIXES: .hpux_make_needs_suffix_list - - -# Command-line flag to silence nested $(MAKE). -$(VERBOSE)MAKESILENT = -s - -# Suppress display of executed commands. -$(VERBOSE).SILENT: - - -# A target that is always out of date. -cmake_force: - -.PHONY : cmake_force - -#============================================================================= -# Set environment variables for the build. - -# The shell in which to execute make rules. -SHELL = /bin/sh - -# The CMake executable. -CMAKE_COMMAND = /usr/bin/cmake - -# The command to remove a file. -RM = /usr/bin/cmake -E rm -f - -# Escaping for special characters. -EQUALS = = - -# The top-level source directory on which CMake was run. -CMAKE_SOURCE_DIR = /home/robclark/src/envytools - -# The top-level build directory on which CMake was run. -CMAKE_BINARY_DIR = /home/robclark/src/envytools - -#============================================================================= -# Targets provided globally by CMake. - -# Special rule for the target install/strip -install/strip: preinstall - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..." - /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake -.PHONY : install/strip - -# Special rule for the target install/strip -install/strip/fast: preinstall/fast - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..." - /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake -.PHONY : install/strip/fast - -# Special rule for the target install/local -install/local: preinstall - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..." - /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake -.PHONY : install/local - -# Special rule for the target install/local -install/local/fast: preinstall/fast - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..." - /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake -.PHONY : install/local/fast - -# Special rule for the target edit_cache -edit_cache: - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..." - /usr/bin/ccmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) -.PHONY : edit_cache - -# Special rule for the target edit_cache -edit_cache/fast: edit_cache - -.PHONY : edit_cache/fast - -# Special rule for the target test -test: - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running tests..." - /usr/bin/ctest --force-new-ctest-process $(ARGS) -.PHONY : test - -# Special rule for the target test -test/fast: test - -.PHONY : test/fast - -# Special rule for the target install -install: preinstall - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..." - /usr/bin/cmake -P cmake_install.cmake -.PHONY : install - -# Special rule for the target install -install/fast: preinstall/fast - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..." - /usr/bin/cmake -P cmake_install.cmake -.PHONY : install/fast - -# Special rule for the target list_install_components -list_install_components: - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Available install components are: \"Unspecified\"" -.PHONY : list_install_components - -# Special rule for the target list_install_components -list_install_components/fast: list_install_components - -.PHONY : list_install_components/fast - -# Special rule for the target rebuild_cache -rebuild_cache: - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." - /usr/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) -.PHONY : rebuild_cache - -# Special rule for the target rebuild_cache -rebuild_cache/fast: rebuild_cache - -.PHONY : rebuild_cache/fast - -# The main all target -all: cmake_check_build_system - cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -E cmake_progress_start /home/robclark/src/envytools/CMakeFiles /home/robclark/src/envytools/afuc/CMakeFiles/progress.marks - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/all - $(CMAKE_COMMAND) -E cmake_progress_start /home/robclark/src/envytools/CMakeFiles 0 -.PHONY : all - -# The main clean target -clean: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/clean -.PHONY : clean - -# The main clean target -clean/fast: clean - -.PHONY : clean/fast - -# Prepare targets for installation. -preinstall: all - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/preinstall -.PHONY : preinstall - -# Prepare targets for installation. -preinstall/fast: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/preinstall -.PHONY : preinstall/fast - -# clear depends -depend: - cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 -.PHONY : depend - -# Convenience name for target. -afuc/CMakeFiles/asm.dir/rule: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/CMakeFiles/asm.dir/rule -.PHONY : afuc/CMakeFiles/asm.dir/rule - -# Convenience name for target. -asm: afuc/CMakeFiles/asm.dir/rule - -.PHONY : asm - -# fast build rule for target. -asm/fast: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/build -.PHONY : asm/fast - -# Convenience name for target. -afuc/CMakeFiles/disasm.dir/rule: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/CMakeFiles/disasm.dir/rule -.PHONY : afuc/CMakeFiles/disasm.dir/rule - -# Convenience name for target. -disasm: afuc/CMakeFiles/disasm.dir/rule - -.PHONY : disasm - -# fast build rule for target. -disasm/fast: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/build -.PHONY : disasm/fast - -asm.o: asm.c.o - -.PHONY : asm.o - -# target to build an object file -asm.c.o: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.o -.PHONY : asm.c.o - -asm.i: asm.c.i - -.PHONY : asm.i - -# target to preprocess a source file -asm.c.i: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.i -.PHONY : asm.c.i - -asm.s: asm.c.s - -.PHONY : asm.s - -# target to generate assembly for a file -asm.c.s: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.s -.PHONY : asm.c.s - -disasm.o: disasm.c.o - -.PHONY : disasm.o - -# target to build an object file -disasm.c.o: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.o -.PHONY : disasm.c.o - -disasm.i: disasm.c.i - -.PHONY : disasm.i - -# target to preprocess a source file -disasm.c.i: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.i -.PHONY : disasm.c.i - -disasm.s: disasm.c.s - -.PHONY : disasm.s - -# target to generate assembly for a file -disasm.c.s: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.s -.PHONY : disasm.c.s - -lexer.o: lexer.c.o - -.PHONY : lexer.o - -# target to build an object file -lexer.c.o: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.o -.PHONY : lexer.c.o - -lexer.i: lexer.c.i - -.PHONY : lexer.i - -# target to preprocess a source file -lexer.c.i: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.i -.PHONY : lexer.c.i - -lexer.s: lexer.c.s - -.PHONY : lexer.s - -# target to generate assembly for a file -lexer.c.s: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.s -.PHONY : lexer.c.s - -parser.o: parser.c.o - -.PHONY : parser.o - -# target to build an object file -parser.c.o: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.o -.PHONY : parser.c.o - -parser.i: parser.c.i - -.PHONY : parser.i - -# target to preprocess a source file -parser.c.i: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.i -.PHONY : parser.c.i - -parser.s: parser.c.s - -.PHONY : parser.s - -# target to generate assembly for a file -parser.c.s: - cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.s -.PHONY : parser.c.s - -# Help Target -help: - @echo "The following are some of the valid targets for this Makefile:" - @echo "... all (the default if no target is provided)" - @echo "... clean" - @echo "... depend" - @echo "... edit_cache" - @echo "... install" - @echo "... install/local" - @echo "... install/strip" - @echo "... list_install_components" - @echo "... rebuild_cache" - @echo "... test" - @echo "... asm" - @echo "... disasm" - @echo "... asm.o" - @echo "... asm.i" - @echo "... asm.s" - @echo "... disasm.o" - @echo "... disasm.i" - @echo "... disasm.s" - @echo "... lexer.o" - @echo "... lexer.i" - @echo "... lexer.s" - @echo "... parser.o" - @echo "... parser.i" - @echo "... parser.s" -.PHONY : help - - - -#============================================================================= -# Special targets to cleanup operation of make. - -# Special rule to run CMake to check the build system integrity. -# No rule that depends on this can have commands that come from listfiles -# because they might be regenerated. -cmake_check_build_system: - cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 -.PHONY : cmake_check_build_system - diff --git a/lib/mesa/src/freedreno/computerator/a6xx.c b/lib/mesa/src/freedreno/computerator/a6xx.c index 67104a6db..a0ce6f986 100644 --- a/lib/mesa/src/freedreno/computerator/a6xx.c +++ b/lib/mesa/src/freedreno/computerator/a6xx.c @@ -158,6 +158,12 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); OUT_RING(ring, 0x41); + if (a6xx_backend->info->a6xx.has_lpac) { + OUT_PKT4(ring, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, 1); + OUT_RING(ring, A6XX_HLSQ_CS_UNKNOWN_B9D0_SHARED_SIZE(1) | + A6XX_HLSQ_CS_UNKNOWN_B9D0_UNK6); + } + uint32_t local_invocation_id, work_group_id; local_invocation_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID); @@ -171,6 +177,16 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) OUT_RING(ring, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz)); + if (a6xx_backend->info->a6xx.has_lpac) { + OUT_PKT4(ring, REG_A6XX_SP_CS_CNTL_0, 2); + OUT_RING(ring, A6XX_SP_CS_CNTL_0_WGIDCONSTID(work_group_id) | + A6XX_SP_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) | + A6XX_SP_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) | + A6XX_SP_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); + OUT_RING(ring, A6XX_SP_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | + A6XX_SP_CS_CNTL_1_THREADSIZE(thrsz)); + } + OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2); OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */ @@ -180,12 +196,14 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2); OUT_RELOC(ring, v->bo, 0, 0, 0); + uint32_t shader_preload_size = + MIN2(v->instrlen, a6xx_backend->info->a6xx.instr_cache_size); OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3); OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(v->instrlen)); + CP_LOAD_STATE6_0_NUM_UNIT(shader_preload_size)); OUT_RELOC(ring, v->bo, 0, 0, 0); if (v->pvtmem_size > 0) { @@ -296,11 +314,11 @@ cs_ibo_emit(struct fd_ringbuffer *ring, struct fd_submit *submit, unsigned width = sz & MASK(15); unsigned height = sz >> 15; - OUT_RING(state, A6XX_IBO_0_FMT(FMT6_32_UINT) | A6XX_IBO_0_TILE_MODE(0)); - OUT_RING(state, A6XX_IBO_1_WIDTH(width) | A6XX_IBO_1_HEIGHT(height)); - OUT_RING(state, A6XX_IBO_2_PITCH(0) | A6XX_IBO_2_UNK4 | A6XX_IBO_2_UNK31 | - A6XX_IBO_2_TYPE(A6XX_TEX_1D)); - OUT_RING(state, A6XX_IBO_3_ARRAY_PITCH(0)); + OUT_RING(state, A6XX_TEX_CONST_0_FMT(FMT6_32_UINT) | A6XX_TEX_CONST_0_TILE_MODE(0)); + OUT_RING(state, A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height)); + OUT_RING(state, A6XX_TEX_CONST_2_PITCH(0) | A6XX_TEX_CONST_2_BUFFER | + A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER)); + OUT_RING(state, A6XX_TEX_CONST_3_ARRAY_PITCH(0)); OUT_RELOC(state, kernel->bufs[i], 0, 0, 0); OUT_RING(state, 0x00000000); OUT_RING(state, 0x00000000); @@ -519,7 +537,8 @@ a6xx_init(struct fd_device *dev, const struct fd_dev_id *dev_id) .read_perfcntrs = a6xx_read_perfcntrs, }; - a6xx_backend->compiler = ir3_compiler_create(dev, dev_id, false); + a6xx_backend->compiler = ir3_compiler_create(dev, dev_id, + &(struct ir3_compiler_options){}); a6xx_backend->dev = dev; a6xx_backend->info = fd_dev_info(dev_id); diff --git a/lib/mesa/src/freedreno/computerator/ir3_asm.c b/lib/mesa/src/freedreno/computerator/ir3_asm.c index b9c295adf..e0f3c9bc5 100644 --- a/lib/mesa/src/freedreno/computerator/ir3_asm.c +++ b/lib/mesa/src/freedreno/computerator/ir3_asm.c @@ -35,8 +35,6 @@ ir3_asm_assemble(struct ir3_compiler *c, FILE *in) errx(-1, "assembler failed"); struct ir3_shader_variant *v = shader->variants; - v->mergedregs = true; - kernel->v = v; kernel->bin = v->bin; @@ -55,6 +53,9 @@ ir3_asm_assemble(struct ir3_compiler *c, FILE *in) memcpy(fd_bo_map(v->bo), kernel->bin, sz); + /* Always include shaders in kernel crash dumps. */ + fd_bo_mark_for_dump(v->bo); + return kernel; } diff --git a/lib/mesa/src/freedreno/computerator/main.c b/lib/mesa/src/freedreno/computerator/main.c index 0468380be..6c4f14534 100644 --- a/lib/mesa/src/freedreno/computerator/main.c +++ b/lib/mesa/src/freedreno/computerator/main.c @@ -236,11 +236,7 @@ main(int argc, char **argv) } } - int fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER); - if (fd < 0) - err(1, "could not open drm device"); - - struct fd_device *dev = fd_device_new(fd); + struct fd_device *dev = fd_device_open(); struct fd_pipe *pipe = fd_pipe_new(dev, FD_PIPE_3D); const struct fd_dev_id *dev_id = fd_pipe_dev_id(pipe); diff --git a/lib/mesa/src/freedreno/drm/msm_bo.c b/lib/mesa/src/freedreno/drm/msm_bo.c deleted file mode 100644 index da2609903..000000000 --- a/lib/mesa/src/freedreno/drm/msm_bo.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <robclark@freedesktop.org> - */ - -#include "msm_priv.h" - -static int bo_allocate(struct msm_bo *msm_bo) -{ - struct fd_bo *bo = &msm_bo->base; - if (!msm_bo->offset) { - struct drm_msm_gem_info req = { - .handle = bo->handle, - .info = MSM_INFO_GET_OFFSET, - }; - int ret; - - /* if the buffer is already backed by pages then this - * doesn't actually do anything (other than giving us - * the offset) - */ - ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO, - &req, sizeof(req)); - if (ret) { - ERROR_MSG("alloc failed: %s", strerror(errno)); - return ret; - } - - msm_bo->offset = req.value; - } - - return 0; -} - -static int msm_bo_offset(struct fd_bo *bo, uint64_t *offset) -{ - struct msm_bo *msm_bo = to_msm_bo(bo); - int ret = bo_allocate(msm_bo); - if (ret) - return ret; - *offset = msm_bo->offset; - return 0; -} - -static int msm_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op) -{ - struct drm_msm_gem_cpu_prep req = { - .handle = bo->handle, - .op = op, - }; - - get_abs_timeout(&req.timeout, 5000000000); - - return drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_PREP, &req, sizeof(req)); -} - -static void msm_bo_cpu_fini(struct fd_bo *bo) -{ - struct drm_msm_gem_cpu_fini req = { - .handle = bo->handle, - }; - - drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_FINI, &req, sizeof(req)); -} - -static int msm_bo_madvise(struct fd_bo *bo, int willneed) -{ - struct drm_msm_gem_madvise req = { - .handle = bo->handle, - .madv = willneed ? MSM_MADV_WILLNEED : MSM_MADV_DONTNEED, - }; - int ret; - - /* older kernels do not support this: */ - if (bo->dev->version < FD_VERSION_MADVISE) - return willneed; - - ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_MADVISE, &req, sizeof(req)); - if (ret) - return ret; - - return req.retained; -} - -static uint64_t msm_bo_iova(struct fd_bo *bo) -{ - struct drm_msm_gem_info req = { - .handle = bo->handle, - .info = MSM_INFO_GET_IOVA, - }; - int ret; - - ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req)); - debug_assert(ret == 0); - - return req.value; -} - -static void msm_bo_set_name(struct fd_bo *bo, const char *fmt, va_list ap) -{ - struct drm_msm_gem_info req = { - .handle = bo->handle, - .info = MSM_INFO_SET_NAME, - }; - char buf[32]; - int sz; - - if (bo->dev->version < FD_VERSION_SOFTPIN) - return; - - sz = vsnprintf(buf, sizeof(buf), fmt, ap); - - req.value = VOID2U64(buf); - req.len = MIN2(sz, sizeof(buf)); - - drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req)); -} - -static void msm_bo_destroy(struct fd_bo *bo) -{ - struct msm_bo *msm_bo = to_msm_bo(bo); - free(msm_bo); -} - -static const struct fd_bo_funcs funcs = { - .offset = msm_bo_offset, - .cpu_prep = msm_bo_cpu_prep, - .cpu_fini = msm_bo_cpu_fini, - .madvise = msm_bo_madvise, - .iova = msm_bo_iova, - .set_name = msm_bo_set_name, - .destroy = msm_bo_destroy, -}; - -/* allocate a buffer handle: */ -int msm_bo_new_handle(struct fd_device *dev, - uint32_t size, uint32_t flags, uint32_t *handle) -{ - struct drm_msm_gem_new req = { - .size = size, - .flags = MSM_BO_WC, // TODO figure out proper flags.. - }; - int ret; - - if (flags & DRM_FREEDRENO_GEM_SCANOUT) - req.flags |= MSM_BO_SCANOUT; - - if (flags & DRM_FREEDRENO_GEM_GPUREADONLY) - req.flags |= MSM_BO_GPU_READONLY; - - ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, - &req, sizeof(req)); - if (ret) - return ret; - - *handle = req.handle; - - return 0; -} - -/* allocate a new buffer object */ -struct fd_bo * msm_bo_from_handle(struct fd_device *dev, - uint32_t size, uint32_t handle) -{ - struct msm_bo *msm_bo; - struct fd_bo *bo; - - msm_bo = calloc(1, sizeof(*msm_bo)); - if (!msm_bo) - return NULL; - - bo = &msm_bo->base; - bo->funcs = &funcs; - - return bo; -} diff --git a/lib/mesa/src/freedreno/drm/msm_device.c b/lib/mesa/src/freedreno/drm/msm_device.c deleted file mode 100644 index d391ef013..000000000 --- a/lib/mesa/src/freedreno/drm/msm_device.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <robclark@freedesktop.org> - */ - -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> - -#include "msm_priv.h" - -static void msm_device_destroy(struct fd_device *dev) -{ - struct msm_device *msm_dev = to_msm_device(dev); - free(msm_dev); -} - -static const struct fd_device_funcs funcs = { - .bo_new_handle = msm_bo_new_handle, - .bo_from_handle = msm_bo_from_handle, - .pipe_new = msm_pipe_new, - .destroy = msm_device_destroy, -}; - -struct fd_device * msm_device_new(int fd) -{ - struct msm_device *msm_dev; - struct fd_device *dev; - - msm_dev = calloc(1, sizeof(*msm_dev)); - if (!msm_dev) - return NULL; - - dev = &msm_dev->base; - dev->funcs = &funcs; - - dev->bo_size = sizeof(struct msm_bo); - - return dev; -} diff --git a/lib/mesa/src/freedreno/drm/msm_pipe.c b/lib/mesa/src/freedreno/drm/msm_pipe.c deleted file mode 100644 index 7d5b9fcd7..000000000 --- a/lib/mesa/src/freedreno/drm/msm_pipe.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <robclark@freedesktop.org> - */ - -#include "util/slab.h" - -#include "msm_priv.h" - -static int query_param(struct fd_pipe *pipe, uint32_t param, - uint64_t *value) -{ - struct msm_pipe *msm_pipe = to_msm_pipe(pipe); - struct drm_msm_param req = { - .pipe = msm_pipe->pipe, - .param = param, - }; - int ret; - - ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_GET_PARAM, - &req, sizeof(req)); - if (ret) - return ret; - - *value = req.value; - - return 0; -} - -static int msm_pipe_get_param(struct fd_pipe *pipe, - enum fd_param_id param, uint64_t *value) -{ - struct msm_pipe *msm_pipe = to_msm_pipe(pipe); - switch(param) { - case FD_DEVICE_ID: // XXX probably get rid of this.. - case FD_GPU_ID: - *value = msm_pipe->gpu_id; - return 0; - case FD_GMEM_SIZE: - *value = msm_pipe->gmem; - return 0; - case FD_CHIP_ID: - *value = msm_pipe->chip_id; - return 0; - case FD_MAX_FREQ: - return query_param(pipe, MSM_PARAM_MAX_FREQ, value); - case FD_TIMESTAMP: - return query_param(pipe, MSM_PARAM_TIMESTAMP, value); - case FD_NR_RINGS: - return query_param(pipe, MSM_PARAM_NR_RINGS, value); - default: - ERROR_MSG("invalid param id: %d", param); - return -1; - } -} - -static int msm_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp, - uint64_t timeout) -{ - struct fd_device *dev = pipe->dev; - struct drm_msm_wait_fence req = { - .fence = timestamp, - .queueid = to_msm_pipe(pipe)->queue_id, - }; - int ret; - - get_abs_timeout(&req.timeout, timeout); - - ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req)); - if (ret) { - ERROR_MSG("wait-fence failed! %d (%s)", ret, strerror(errno)); - return ret; - } - - return 0; -} - -static int open_submitqueue(struct fd_pipe *pipe, uint32_t prio) -{ - struct drm_msm_submitqueue req = { - .flags = 0, - .prio = prio, - }; - uint64_t nr_rings = 1; - int ret; - - if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES) { - to_msm_pipe(pipe)->queue_id = 0; - return 0; - } - - msm_pipe_get_param(pipe, FD_NR_RINGS, &nr_rings); - - req.prio = MIN2(req.prio, MAX2(nr_rings, 1) - 1); - - ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_NEW, - &req, sizeof(req)); - if (ret) { - ERROR_MSG("could not create submitqueue! %d (%s)", ret, strerror(errno)); - return ret; - } - - to_msm_pipe(pipe)->queue_id = req.id; - return 0; -} - -static void close_submitqueue(struct fd_pipe *pipe, uint32_t queue_id) -{ - if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES) - return; - - drmCommandWrite(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE, - &queue_id, sizeof(queue_id)); -} - -static void msm_pipe_destroy(struct fd_pipe *pipe) -{ - struct msm_pipe *msm_pipe = to_msm_pipe(pipe); - close_submitqueue(pipe, msm_pipe->queue_id); - free(msm_pipe); -} - -static const struct fd_pipe_funcs sp_funcs = { - .ringbuffer_new_object = msm_ringbuffer_sp_new_object, - .submit_new = msm_submit_sp_new, - .get_param = msm_pipe_get_param, - .wait = msm_pipe_wait, - .destroy = msm_pipe_destroy, -}; - -static const struct fd_pipe_funcs legacy_funcs = { - .ringbuffer_new_object = msm_ringbuffer_new_object, - .submit_new = msm_submit_new, - .get_param = msm_pipe_get_param, - .wait = msm_pipe_wait, - .destroy = msm_pipe_destroy, -}; - -static uint64_t get_param(struct fd_pipe *pipe, uint32_t param) -{ - uint64_t value; - int ret = query_param(pipe, param, &value); - if (ret) { - ERROR_MSG("get-param failed! %d (%s)", ret, strerror(errno)); - return 0; - } - return value; -} - -struct fd_pipe * msm_pipe_new(struct fd_device *dev, - enum fd_pipe_id id, uint32_t prio) -{ - static const uint32_t pipe_id[] = { - [FD_PIPE_3D] = MSM_PIPE_3D0, - [FD_PIPE_2D] = MSM_PIPE_2D0, - }; - struct msm_pipe *msm_pipe = NULL; - struct fd_pipe *pipe = NULL; - - msm_pipe = calloc(1, sizeof(*msm_pipe)); - if (!msm_pipe) { - ERROR_MSG("allocation failed"); - goto fail; - } - - pipe = &msm_pipe->base; - - if (fd_device_version(dev) >= FD_VERSION_SOFTPIN) { - pipe->funcs = &sp_funcs; - } else { - pipe->funcs = &legacy_funcs; - } - - /* initialize before get_param(): */ - pipe->dev = dev; - msm_pipe->pipe = pipe_id[id]; - - /* these params should be supported since the first version of drm/msm: */ - msm_pipe->gpu_id = get_param(pipe, MSM_PARAM_GPU_ID); - msm_pipe->gmem = get_param(pipe, MSM_PARAM_GMEM_SIZE); - msm_pipe->chip_id = get_param(pipe, MSM_PARAM_CHIP_ID); - - if (! msm_pipe->gpu_id) - goto fail; - - INFO_MSG("Pipe Info:"); - INFO_MSG(" GPU-id: %d", msm_pipe->gpu_id); - INFO_MSG(" Chip-id: 0x%08x", msm_pipe->chip_id); - INFO_MSG(" GMEM size: 0x%08x", msm_pipe->gmem); - - if (open_submitqueue(pipe, prio)) - goto fail; - - return pipe; -fail: - if (pipe) - fd_pipe_del(pipe); - return NULL; -} diff --git a/lib/mesa/src/freedreno/drm/msm_priv.h b/lib/mesa/src/freedreno/drm/msm_priv.h deleted file mode 100644 index 9cb60bc1d..000000000 --- a/lib/mesa/src/freedreno/drm/msm_priv.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <robclark@freedesktop.org> - */ - -#ifndef MSM_PRIV_H_ -#define MSM_PRIV_H_ - -#include "freedreno_priv.h" - -#ifndef __user -# define __user -#endif - -#include "msm_drm.h" - -struct msm_device { - struct fd_device base; - struct fd_bo_cache ring_cache; -}; -FD_DEFINE_CAST(fd_device, msm_device); - -struct fd_device * msm_device_new(int fd); - -struct msm_pipe { - struct fd_pipe base; - uint32_t pipe; - uint32_t gpu_id; - uint32_t gmem; - uint32_t chip_id; - uint32_t queue_id; -}; -FD_DEFINE_CAST(fd_pipe, msm_pipe); - -struct fd_pipe * msm_pipe_new(struct fd_device *dev, - enum fd_pipe_id id, uint32_t prio); - -struct fd_ringbuffer * msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size); -struct fd_ringbuffer * msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size); - -struct fd_submit * msm_submit_new(struct fd_pipe *pipe); -struct fd_submit * msm_submit_sp_new(struct fd_pipe *pipe); - -struct msm_bo { - struct fd_bo base; - uint64_t offset; - /* to avoid excess hashtable lookups, cache the ring this bo was - * last emitted on (since that will probably also be the next ring - * it is emitted on) - */ - unsigned current_submit_seqno; - uint32_t idx; -}; -FD_DEFINE_CAST(fd_bo, msm_bo); - -int msm_bo_new_handle(struct fd_device *dev, - uint32_t size, uint32_t flags, uint32_t *handle); -struct fd_bo * msm_bo_from_handle(struct fd_device *dev, - uint32_t size, uint32_t handle); - -static inline void -msm_dump_submit(struct drm_msm_gem_submit *req) -{ - for (unsigned i = 0; i < req->nr_bos; i++) { - struct drm_msm_gem_submit_bo *bos = U642VOID(req->bos); - struct drm_msm_gem_submit_bo *bo = &bos[i]; - ERROR_MSG(" bos[%d]: handle=%u, flags=%x", i, bo->handle, bo->flags); - } - for (unsigned i = 0; i < req->nr_cmds; i++) { - struct drm_msm_gem_submit_cmd *cmds = U642VOID(req->cmds); - struct drm_msm_gem_submit_cmd *cmd = &cmds[i]; - struct drm_msm_gem_submit_reloc *relocs = U642VOID(cmd->relocs); - ERROR_MSG(" cmd[%d]: type=%u, submit_idx=%u, submit_offset=%u, size=%u", - i, cmd->type, cmd->submit_idx, cmd->submit_offset, cmd->size); - for (unsigned j = 0; j < cmd->nr_relocs; j++) { - struct drm_msm_gem_submit_reloc *r = &relocs[j]; - ERROR_MSG(" reloc[%d]: submit_offset=%u, or=%08x, shift=%d, reloc_idx=%u" - ", reloc_offset=%"PRIu64, j, r->submit_offset, r->or, r->shift, - r->reloc_idx, r->reloc_offset); - } - } -} - -static inline void get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns) -{ - struct timespec t; - uint32_t s = ns / 1000000000; - clock_gettime(CLOCK_MONOTONIC, &t); - tv->tv_sec = t.tv_sec + s; - tv->tv_nsec = t.tv_nsec + ns - (s * 1000000000); -} - -/* - * Stupid/simple growable array implementation: - */ - -static inline void * -grow(void *ptr, uint16_t nr, uint16_t *max, uint16_t sz) -{ - if ((nr + 1) > *max) { - if ((*max * 2) < (nr + 1)) - *max = nr + 5; - else - *max = *max * 2; - ptr = realloc(ptr, *max * sz); - } - return ptr; -} - -#define DECLARE_ARRAY(type, name) \ - unsigned short nr_ ## name, max_ ## name; \ - type * name; - -#define APPEND(x, name) ({ \ - (x)->name = grow((x)->name, (x)->nr_ ## name, &(x)->max_ ## name, sizeof((x)->name[0])); \ - (x)->nr_ ## name ++; \ -}) - -#endif /* MSM_PRIV_H_ */ diff --git a/lib/mesa/src/freedreno/drm/msm_ringbuffer.c b/lib/mesa/src/freedreno/drm/msm_ringbuffer.c deleted file mode 100644 index 369f26f98..000000000 --- a/lib/mesa/src/freedreno/drm/msm_ringbuffer.c +++ /dev/null @@ -1,722 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <robclark@freedesktop.org> - */ - -#include <assert.h> -#include <inttypes.h> - -#include "util/hash_table.h" -#include "util/set.h" -#include "util/slab.h" - -#include "drm/freedreno_ringbuffer.h" -#include "msm_priv.h" - -/* The legacy implementation of submit/ringbuffer, which still does the - * traditional reloc and cmd tracking - */ - - -#define INIT_SIZE 0x1000 - -static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER; - - -struct msm_submit { - struct fd_submit base; - - DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos); - DECLARE_ARRAY(struct fd_bo *, bos); - - unsigned seqno; - - /* maps fd_bo to idx in bos table: */ - struct hash_table *bo_table; - - struct slab_mempool ring_pool; - - /* hash-set of associated rings: */ - struct set *ring_set; - - struct fd_ringbuffer *primary; - - /* Allow for sub-allocation of stateobj ring buffers (ie. sharing - * the same underlying bo).. - * - * We also rely on previous stateobj having been fully constructed - * so we can reclaim extra space at it's end. - */ - struct fd_ringbuffer *suballoc_ring; -}; -FD_DEFINE_CAST(fd_submit, msm_submit); - -/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers - * and sizes. Ie. a finalized buffer can have no more commands appended to - * it. - */ -struct msm_cmd { - struct fd_bo *ring_bo; - unsigned size; - DECLARE_ARRAY(struct drm_msm_gem_submit_reloc, relocs); -}; - -static struct msm_cmd * -cmd_new(struct fd_bo *ring_bo) -{ - struct msm_cmd *cmd = malloc(sizeof(*cmd)); - cmd->ring_bo = fd_bo_ref(ring_bo); - cmd->size = 0; - cmd->nr_relocs = cmd->max_relocs = 0; - cmd->relocs = NULL; - return cmd; -} - -static void -cmd_free(struct msm_cmd *cmd) -{ - fd_bo_del(cmd->ring_bo); - free(cmd->relocs); - free(cmd); -} - -/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to - * later copy into the submit when the stateobj rb is later referenced by - * a regular rb: - */ -struct msm_reloc_bo { - struct fd_bo *bo; - unsigned flags; -}; - -struct msm_ringbuffer { - struct fd_ringbuffer base; - - /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */ - unsigned offset; - - union { - /* for _FD_RINGBUFFER_OBJECT case: */ - struct { - struct fd_pipe *pipe; - DECLARE_ARRAY(struct msm_reloc_bo, reloc_bos); - struct set *ring_set; - }; - /* for other cases: */ - struct { - struct fd_submit *submit; - DECLARE_ARRAY(struct msm_cmd *, cmds); - }; - } u; - - struct msm_cmd *cmd; /* current cmd */ - struct fd_bo *ring_bo; -}; -FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer); - -static void finalize_current_cmd(struct fd_ringbuffer *ring); -static struct fd_ringbuffer * msm_ringbuffer_init( - struct msm_ringbuffer *msm_ring, - uint32_t size, enum fd_ringbuffer_flags flags); - -/* add (if needed) bo to submit and return index: */ -static uint32_t -append_bo(struct msm_submit *submit, struct fd_bo *bo, uint32_t flags) -{ - struct msm_bo *msm_bo = to_msm_bo(bo); - uint32_t idx; - pthread_mutex_lock(&idx_lock); - if (likely(msm_bo->current_submit_seqno == submit->seqno)) { - idx = msm_bo->idx; - } else { - uint32_t hash = _mesa_hash_pointer(bo); - struct hash_entry *entry; - - entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo); - if (entry) { - /* found */ - idx = (uint32_t)(uintptr_t)entry->data; - } else { - idx = APPEND(submit, submit_bos); - idx = APPEND(submit, bos); - - submit->submit_bos[idx].flags = 0; - submit->submit_bos[idx].handle = bo->handle; - submit->submit_bos[idx].presumed = 0; - - submit->bos[idx] = fd_bo_ref(bo); - - _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo, - (void *)(uintptr_t)idx); - } - msm_bo->current_submit_seqno = submit->seqno; - msm_bo->idx = idx; - } - pthread_mutex_unlock(&idx_lock); - if (flags & FD_RELOC_READ) - submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ; - if (flags & FD_RELOC_WRITE) - submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE; - return idx; -} - -static void -append_ring(struct set *set, struct fd_ringbuffer *ring) -{ - uint32_t hash = _mesa_hash_pointer(ring); - - if (!_mesa_set_search_pre_hashed(set, hash, ring)) { - fd_ringbuffer_ref(ring); - _mesa_set_add_pre_hashed(set, hash, ring); - } -} - -static void -msm_submit_suballoc_ring_bo(struct fd_submit *submit, - struct msm_ringbuffer *msm_ring, uint32_t size) -{ - struct msm_submit *msm_submit = to_msm_submit(submit); - unsigned suballoc_offset = 0; - struct fd_bo *suballoc_bo = NULL; - - if (msm_submit->suballoc_ring) { - struct msm_ringbuffer *suballoc_ring = - to_msm_ringbuffer(msm_submit->suballoc_ring); - - suballoc_bo = suballoc_ring->ring_bo; - suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) + - suballoc_ring->offset; - - suballoc_offset = align(suballoc_offset, 0x10); - - if ((size + suballoc_offset) > suballoc_bo->size) { - suballoc_bo = NULL; - } - } - - if (!suballoc_bo) { - // TODO possibly larger size for streaming bo? - msm_ring->ring_bo = fd_bo_new_ring( - submit->pipe->dev, 0x8000, 0); - msm_ring->offset = 0; - } else { - msm_ring->ring_bo = fd_bo_ref(suballoc_bo); - msm_ring->offset = suballoc_offset; - } - - struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring; - - msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base); - - if (old_suballoc_ring) - fd_ringbuffer_del(old_suballoc_ring); -} - -static struct fd_ringbuffer * -msm_submit_new_ringbuffer(struct fd_submit *submit, uint32_t size, - enum fd_ringbuffer_flags flags) -{ - struct msm_submit *msm_submit = to_msm_submit(submit); - struct msm_ringbuffer *msm_ring; - - msm_ring = slab_alloc_st(&msm_submit->ring_pool); - - msm_ring->u.submit = submit; - - /* NOTE: needs to be before _suballoc_ring_bo() since it could - * increment the refcnt of the current ring - */ - msm_ring->base.refcnt = 1; - - if (flags & FD_RINGBUFFER_STREAMING) { - msm_submit_suballoc_ring_bo(submit, msm_ring, size); - } else { - if (flags & FD_RINGBUFFER_GROWABLE) - size = INIT_SIZE; - - msm_ring->offset = 0; - msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 0); - } - - if (!msm_ringbuffer_init(msm_ring, size, flags)) - return NULL; - - if (flags & FD_RINGBUFFER_PRIMARY) { - debug_assert(!msm_submit->primary); - msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base); - } - - return &msm_ring->base; -} - -static struct drm_msm_gem_submit_reloc * -handle_stateobj_relocs(struct msm_submit *submit, struct msm_ringbuffer *ring) -{ - struct msm_cmd *cmd = ring->cmd; - struct drm_msm_gem_submit_reloc *relocs; - - relocs = malloc(cmd->nr_relocs * sizeof(*relocs)); - - for (unsigned i = 0; i < cmd->nr_relocs; i++) { - unsigned idx = cmd->relocs[i].reloc_idx; - struct fd_bo *bo = ring->u.reloc_bos[idx].bo; - unsigned flags = 0; - - if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_READ) - flags |= FD_RELOC_READ; - if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_WRITE) - flags |= FD_RELOC_WRITE; - - relocs[i] = cmd->relocs[i]; - relocs[i].reloc_idx = append_bo(submit, bo, flags); - } - - return relocs; -} - -static int -msm_submit_flush(struct fd_submit *submit, int in_fence_fd, - int *out_fence_fd, uint32_t *out_fence) -{ - struct msm_submit *msm_submit = to_msm_submit(submit); - struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe); - struct drm_msm_gem_submit req = { - .flags = msm_pipe->pipe, - .queueid = msm_pipe->queue_id, - }; - int ret; - - debug_assert(msm_submit->primary); - - finalize_current_cmd(msm_submit->primary); - append_ring(msm_submit->ring_set, msm_submit->primary); - - unsigned nr_cmds = 0; - unsigned nr_objs = 0; - - set_foreach(msm_submit->ring_set, entry) { - struct fd_ringbuffer *ring = (void *)entry->key; - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - nr_cmds += 1; - nr_objs += 1; - } else { - if (ring != msm_submit->primary) - finalize_current_cmd(ring); - nr_cmds += to_msm_ringbuffer(ring)->u.nr_cmds; - } - } - - void *obj_relocs[nr_objs]; - struct drm_msm_gem_submit_cmd cmds[nr_cmds]; - unsigned i = 0, o = 0; - - set_foreach(msm_submit->ring_set, entry) { - struct fd_ringbuffer *ring = (void *)entry->key; - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - - debug_assert(i < nr_cmds); - - // TODO handle relocs: - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - - debug_assert(o < nr_objs); - - void *relocs = handle_stateobj_relocs(msm_submit, msm_ring); - obj_relocs[o++] = relocs; - - cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; - cmds[i].submit_idx = - append_bo(msm_submit, msm_ring->ring_bo, FD_RELOC_READ); - cmds[i].submit_offset = msm_ring->offset; - cmds[i].size = offset_bytes(ring->cur, ring->start); - cmds[i].pad = 0; - cmds[i].nr_relocs = msm_ring->cmd->nr_relocs; - cmds[i].relocs = VOID2U64(relocs); - - i++; - } else { - for (unsigned j = 0; j < msm_ring->u.nr_cmds; j++) { - if (ring->flags & FD_RINGBUFFER_PRIMARY) { - cmds[i].type = MSM_SUBMIT_CMD_BUF; - } else { - cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; - } - cmds[i].submit_idx = append_bo(msm_submit, - msm_ring->u.cmds[j]->ring_bo, FD_RELOC_READ); - cmds[i].submit_offset = msm_ring->offset; - cmds[i].size = msm_ring->u.cmds[j]->size; - cmds[i].pad = 0; - cmds[i].nr_relocs = msm_ring->u.cmds[j]->nr_relocs; - cmds[i].relocs = VOID2U64(msm_ring->u.cmds[j]->relocs); - - i++; - } - } - } - - if (in_fence_fd != -1) { - req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT; - req.fence_fd = in_fence_fd; - } - - if (out_fence_fd) { - req.flags |= MSM_SUBMIT_FENCE_FD_OUT; - } - - /* needs to be after get_cmd() as that could create bos/cmds table: */ - req.bos = VOID2U64(msm_submit->submit_bos), - req.nr_bos = msm_submit->nr_submit_bos; - req.cmds = VOID2U64(cmds), - req.nr_cmds = nr_cmds; - - DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos); - - ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT, - &req, sizeof(req)); - if (ret) { - ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno)); - msm_dump_submit(&req); - } else if (!ret) { - if (out_fence) - *out_fence = req.fence; - - if (out_fence_fd) - *out_fence_fd = req.fence_fd; - } - - for (unsigned o = 0; o < nr_objs; o++) - free(obj_relocs[o]); - - return ret; -} - -static void -unref_rings(struct set_entry *entry) -{ - struct fd_ringbuffer *ring = (void *)entry->key; - fd_ringbuffer_del(ring); -} - -static void -msm_submit_destroy(struct fd_submit *submit) -{ - struct msm_submit *msm_submit = to_msm_submit(submit); - - if (msm_submit->primary) - fd_ringbuffer_del(msm_submit->primary); - if (msm_submit->suballoc_ring) - fd_ringbuffer_del(msm_submit->suballoc_ring); - - _mesa_hash_table_destroy(msm_submit->bo_table, NULL); - _mesa_set_destroy(msm_submit->ring_set, unref_rings); - - // TODO it would be nice to have a way to debug_assert() if all - // rb's haven't been free'd back to the slab, because that is - // an indication that we are leaking bo's - slab_destroy(&msm_submit->ring_pool); - - for (unsigned i = 0; i < msm_submit->nr_bos; i++) - fd_bo_del(msm_submit->bos[i]); - - free(msm_submit->submit_bos); - free(msm_submit->bos); - free(msm_submit); -} - -static const struct fd_submit_funcs submit_funcs = { - .new_ringbuffer = msm_submit_new_ringbuffer, - .flush = msm_submit_flush, - .destroy = msm_submit_destroy, -}; - -struct fd_submit * -msm_submit_new(struct fd_pipe *pipe) -{ - struct msm_submit *msm_submit = calloc(1, sizeof(*msm_submit)); - struct fd_submit *submit; - static unsigned submit_cnt = 0; - - msm_submit->seqno = ++submit_cnt; - msm_submit->bo_table = _mesa_hash_table_create(NULL, - _mesa_hash_pointer, _mesa_key_pointer_equal); - msm_submit->ring_set = _mesa_set_create(NULL, - _mesa_hash_pointer, _mesa_key_pointer_equal); - // TODO tune size: - slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer), 16); - - submit = &msm_submit->base; - submit->pipe = pipe; - submit->funcs = &submit_funcs; - - return submit; -} - - -static void -finalize_current_cmd(struct fd_ringbuffer *ring) -{ - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - - debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); - - if (!msm_ring->cmd) - return; - - debug_assert(msm_ring->cmd->ring_bo == msm_ring->ring_bo); - - unsigned idx = APPEND(&msm_ring->u, cmds); - - msm_ring->u.cmds[idx] = msm_ring->cmd; - msm_ring->cmd = NULL; - - msm_ring->u.cmds[idx]->size = offset_bytes(ring->cur, ring->start); -} - -static void -msm_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t size) -{ - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - struct fd_pipe *pipe = msm_ring->u.submit->pipe; - - debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE); - - finalize_current_cmd(ring); - - fd_bo_del(msm_ring->ring_bo); - msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0); - msm_ring->cmd = cmd_new(msm_ring->ring_bo); - - ring->start = fd_bo_map(msm_ring->ring_bo); - ring->end = &(ring->start[size/4]); - ring->cur = ring->start; - ring->size = size; -} - -static void -msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring, - const struct fd_reloc *reloc) -{ - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - struct fd_pipe *pipe; - unsigned reloc_idx; - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - unsigned idx = APPEND(&msm_ring->u, reloc_bos); - - msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo); - msm_ring->u.reloc_bos[idx].flags = reloc->flags; - - /* this gets fixed up at submit->flush() time, since this state- - * object rb can be used with many different submits - */ - reloc_idx = idx; - - pipe = msm_ring->u.pipe; - } else { - struct msm_submit *msm_submit = - to_msm_submit(msm_ring->u.submit); - - reloc_idx = append_bo(msm_submit, reloc->bo, reloc->flags); - - pipe = msm_ring->u.submit->pipe; - } - - struct drm_msm_gem_submit_reloc *r; - unsigned idx = APPEND(msm_ring->cmd, relocs); - - r = &msm_ring->cmd->relocs[idx]; - - r->reloc_idx = reloc_idx; - r->reloc_offset = reloc->offset; - r->or = reloc->or; - r->shift = reloc->shift; - r->submit_offset = offset_bytes(ring->cur, ring->start) + - msm_ring->offset; - - ring->cur++; - - if (pipe->gpu_id >= 500) { - idx = APPEND(msm_ring->cmd, relocs); - r = &msm_ring->cmd->relocs[idx]; - - r->reloc_idx = reloc_idx; - r->reloc_offset = reloc->offset; - r->or = reloc->orhi; - r->shift = reloc->shift - 32; - r->submit_offset = offset_bytes(ring->cur, ring->start) + - msm_ring->offset; - - ring->cur++; - } -} - -static void -append_stateobj_rings(struct msm_submit *submit, struct fd_ringbuffer *target) -{ - struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target); - - debug_assert(target->flags & _FD_RINGBUFFER_OBJECT); - - set_foreach(msm_target->u.ring_set, entry) { - struct fd_ringbuffer *ring = (void *)entry->key; - - append_ring(submit->ring_set, ring); - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - append_stateobj_rings(submit, ring); - } - } -} - -static uint32_t -msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring, - struct fd_ringbuffer *target, uint32_t cmd_idx) -{ - struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target); - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - struct fd_bo *bo; - uint32_t size; - - if ((target->flags & FD_RINGBUFFER_GROWABLE) && - (cmd_idx < msm_target->u.nr_cmds)) { - bo = msm_target->u.cmds[cmd_idx]->ring_bo; - size = msm_target->u.cmds[cmd_idx]->size; - } else { - bo = msm_target->ring_bo; - size = offset_bytes(target->cur, target->start); - } - - msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){ - .bo = bo, - .flags = FD_RELOC_READ, - .offset = msm_target->offset, - }); - - if ((target->flags & _FD_RINGBUFFER_OBJECT) && - !(ring->flags & _FD_RINGBUFFER_OBJECT)) { - struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit); - - append_stateobj_rings(msm_submit, target); - } - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - append_ring(msm_ring->u.ring_set, target); - } else { - struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit); - append_ring(msm_submit->ring_set, target); - } - - return size; -} - -static uint32_t -msm_ringbuffer_cmd_count(struct fd_ringbuffer *ring) -{ - if (ring->flags & FD_RINGBUFFER_GROWABLE) - return to_msm_ringbuffer(ring)->u.nr_cmds + 1; - return 1; -} - -static void -msm_ringbuffer_destroy(struct fd_ringbuffer *ring) -{ - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - - fd_bo_del(msm_ring->ring_bo); - if (msm_ring->cmd) - cmd_free(msm_ring->cmd); - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) { - fd_bo_del(msm_ring->u.reloc_bos[i].bo); - } - - _mesa_set_destroy(msm_ring->u.ring_set, unref_rings); - - free(msm_ring->u.reloc_bos); - free(msm_ring); - } else { - struct fd_submit *submit = msm_ring->u.submit; - - for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) { - cmd_free(msm_ring->u.cmds[i]); - } - - free(msm_ring->u.cmds); - slab_free_st(&to_msm_submit(submit)->ring_pool, msm_ring); - } -} - -static const struct fd_ringbuffer_funcs ring_funcs = { - .grow = msm_ringbuffer_grow, - .emit_reloc = msm_ringbuffer_emit_reloc, - .emit_reloc_ring = msm_ringbuffer_emit_reloc_ring, - .cmd_count = msm_ringbuffer_cmd_count, - .destroy = msm_ringbuffer_destroy, -}; - -static inline struct fd_ringbuffer * -msm_ringbuffer_init(struct msm_ringbuffer *msm_ring, uint32_t size, - enum fd_ringbuffer_flags flags) -{ - struct fd_ringbuffer *ring = &msm_ring->base; - - debug_assert(msm_ring->ring_bo); - - uint8_t *base = fd_bo_map(msm_ring->ring_bo); - ring->start = (void *)(base + msm_ring->offset); - ring->end = &(ring->start[size/4]); - ring->cur = ring->start; - - ring->size = size; - ring->flags = flags; - - ring->funcs = &ring_funcs; - - msm_ring->u.cmds = NULL; - msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0; - - msm_ring->cmd = cmd_new(msm_ring->ring_bo); - - return ring; -} - -struct fd_ringbuffer * -msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size) -{ - struct msm_ringbuffer *msm_ring = malloc(sizeof(*msm_ring)); - - msm_ring->u.pipe = pipe; - msm_ring->offset = 0; - msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0); - msm_ring->base.refcnt = 1; - - msm_ring->u.reloc_bos = NULL; - msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0; - - msm_ring->u.ring_set = _mesa_set_create(NULL, - _mesa_hash_pointer, _mesa_key_pointer_equal); - - return msm_ringbuffer_init(msm_ring, size, _FD_RINGBUFFER_OBJECT); -} diff --git a/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.c b/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.c deleted file mode 100644 index 2b8f53172..000000000 --- a/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.c +++ /dev/null @@ -1,568 +0,0 @@ -/* - * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <robclark@freedesktop.org> - */ - -#include <assert.h> -#include <inttypes.h> - -#include "util/hash_table.h" -#include "util/slab.h" - -#include "drm/freedreno_ringbuffer.h" -#include "msm_priv.h" - -/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead - * by avoiding the additional tracking necessary to build cmds/relocs tables - * (but still builds a bos table) - */ - - -#define INIT_SIZE 0x1000 - -static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER; - - -struct msm_submit_sp { - struct fd_submit base; - - DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos); - DECLARE_ARRAY(struct fd_bo *, bos); - - unsigned seqno; - - /* maps fd_bo to idx in bos table: */ - struct hash_table *bo_table; - - struct slab_mempool ring_pool; - - struct fd_ringbuffer *primary; - - /* Allow for sub-allocation of stateobj ring buffers (ie. sharing - * the same underlying bo).. - * - * We also rely on previous stateobj having been fully constructed - * so we can reclaim extra space at it's end. - */ - struct fd_ringbuffer *suballoc_ring; -}; -FD_DEFINE_CAST(fd_submit, msm_submit_sp); - -/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers - * and sizes. Ie. a finalized buffer can have no more commands appended to - * it. - */ -struct msm_cmd_sp { - struct fd_bo *ring_bo; - unsigned size; -}; - -/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to - * later copy into the submit when the stateobj rb is later referenced by - * a regular rb: - */ -struct msm_reloc_bo_sp { - struct fd_bo *bo; - unsigned flags; -}; - -struct msm_ringbuffer_sp { - struct fd_ringbuffer base; - - /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */ - unsigned offset; - -// TODO check disasm.. hopefully compilers CSE can realize that -// reloc_bos and cmds are at the same offsets and optimize some -// divergent cases into single case - union { - /* for _FD_RINGBUFFER_OBJECT case: */ - struct { - struct fd_pipe *pipe; - DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos); - }; - /* for other cases: */ - struct { - struct fd_submit *submit; - DECLARE_ARRAY(struct msm_cmd_sp, cmds); - }; - } u; - - struct fd_bo *ring_bo; -}; -FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp); - -static void finalize_current_cmd(struct fd_ringbuffer *ring); -static struct fd_ringbuffer * msm_ringbuffer_sp_init( - struct msm_ringbuffer_sp *msm_ring, - uint32_t size, enum fd_ringbuffer_flags flags); - -/* add (if needed) bo to submit and return index: */ -static uint32_t -append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags) -{ - struct msm_bo *msm_bo = to_msm_bo(bo); - uint32_t idx; - pthread_mutex_lock(&idx_lock); - if (likely(msm_bo->current_submit_seqno == submit->seqno)) { - idx = msm_bo->idx; - } else { - uint32_t hash = _mesa_hash_pointer(bo); - struct hash_entry *entry; - - entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo); - if (entry) { - /* found */ - idx = (uint32_t)(uintptr_t)entry->data; - } else { - idx = APPEND(submit, submit_bos); - idx = APPEND(submit, bos); - - submit->submit_bos[idx].flags = 0; - submit->submit_bos[idx].handle = bo->handle; - submit->submit_bos[idx].presumed = 0; - - submit->bos[idx] = fd_bo_ref(bo); - - _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo, - (void *)(uintptr_t)idx); - } - msm_bo->current_submit_seqno = submit->seqno; - msm_bo->idx = idx; - } - pthread_mutex_unlock(&idx_lock); - if (flags & FD_RELOC_READ) - submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ; - if (flags & FD_RELOC_WRITE) - submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE; - if (flags & FD_RELOC_DUMP) - submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_DUMP; - return idx; -} - -static void -msm_submit_suballoc_ring_bo(struct fd_submit *submit, - struct msm_ringbuffer_sp *msm_ring, uint32_t size) -{ - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - unsigned suballoc_offset = 0; - struct fd_bo *suballoc_bo = NULL; - - if (msm_submit->suballoc_ring) { - struct msm_ringbuffer_sp *suballoc_ring = - to_msm_ringbuffer_sp(msm_submit->suballoc_ring); - - suballoc_bo = suballoc_ring->ring_bo; - suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) + - suballoc_ring->offset; - - suballoc_offset = align(suballoc_offset, 0x10); - - if ((size + suballoc_offset) > suballoc_bo->size) { - suballoc_bo = NULL; - } - } - - if (!suballoc_bo) { - // TODO possibly larger size for streaming bo? - msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, - 0x8000, DRM_FREEDRENO_GEM_GPUREADONLY); - msm_ring->offset = 0; - } else { - msm_ring->ring_bo = fd_bo_ref(suballoc_bo); - msm_ring->offset = suballoc_offset; - } - - struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring; - - msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base); - - if (old_suballoc_ring) - fd_ringbuffer_del(old_suballoc_ring); -} - -static struct fd_ringbuffer * -msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size, - enum fd_ringbuffer_flags flags) -{ - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - struct msm_ringbuffer_sp *msm_ring; - - msm_ring = slab_alloc_st(&msm_submit->ring_pool); - - msm_ring->u.submit = submit; - - /* NOTE: needs to be before _suballoc_ring_bo() since it could - * increment the refcnt of the current ring - */ - msm_ring->base.refcnt = 1; - - if (flags & FD_RINGBUFFER_STREAMING) { - msm_submit_suballoc_ring_bo(submit, msm_ring, size); - } else { - if (flags & FD_RINGBUFFER_GROWABLE) - size = INIT_SIZE; - - msm_ring->offset = 0; - msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, - DRM_FREEDRENO_GEM_GPUREADONLY); - } - - if (!msm_ringbuffer_sp_init(msm_ring, size, flags)) - return NULL; - - if (flags & FD_RINGBUFFER_PRIMARY) { - debug_assert(!msm_submit->primary); - msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base); - } - - return &msm_ring->base; -} - -static int -msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, - int *out_fence_fd, uint32_t *out_fence) -{ - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe); - struct drm_msm_gem_submit req = { - .flags = msm_pipe->pipe, - .queueid = msm_pipe->queue_id, - }; - int ret; - - debug_assert(msm_submit->primary); - finalize_current_cmd(msm_submit->primary); - - struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary); - struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds]; - - for (unsigned i = 0; i < primary->u.nr_cmds; i++) { - cmds[i].type = MSM_SUBMIT_CMD_BUF; - cmds[i].submit_idx = append_bo(msm_submit, - primary->u.cmds[i].ring_bo, FD_RELOC_READ | FD_RELOC_DUMP); - cmds[i].submit_offset = primary->offset; - cmds[i].size = primary->u.cmds[i].size; - cmds[i].pad = 0; - cmds[i].nr_relocs = 0; - } - - if (in_fence_fd != -1) { - req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT; - req.fence_fd = in_fence_fd; - } - - if (out_fence_fd) { - req.flags |= MSM_SUBMIT_FENCE_FD_OUT; - } - - /* needs to be after get_cmd() as that could create bos/cmds table: */ - req.bos = VOID2U64(msm_submit->submit_bos), - req.nr_bos = msm_submit->nr_submit_bos; - req.cmds = VOID2U64(cmds), - req.nr_cmds = primary->u.nr_cmds; - - DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos); - - ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT, - &req, sizeof(req)); - if (ret) { - ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno)); - msm_dump_submit(&req); - } else if (!ret) { - if (out_fence) - *out_fence = req.fence; - - if (out_fence_fd) - *out_fence_fd = req.fence_fd; - } - - return ret; -} - -static void -msm_submit_sp_destroy(struct fd_submit *submit) -{ - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - - if (msm_submit->primary) - fd_ringbuffer_del(msm_submit->primary); - if (msm_submit->suballoc_ring) - fd_ringbuffer_del(msm_submit->suballoc_ring); - - _mesa_hash_table_destroy(msm_submit->bo_table, NULL); - - // TODO it would be nice to have a way to debug_assert() if all - // rb's haven't been free'd back to the slab, because that is - // an indication that we are leaking bo's - slab_destroy(&msm_submit->ring_pool); - - for (unsigned i = 0; i < msm_submit->nr_bos; i++) - fd_bo_del(msm_submit->bos[i]); - - free(msm_submit->submit_bos); - free(msm_submit->bos); - free(msm_submit); -} - -static const struct fd_submit_funcs submit_funcs = { - .new_ringbuffer = msm_submit_sp_new_ringbuffer, - .flush = msm_submit_sp_flush, - .destroy = msm_submit_sp_destroy, -}; - -struct fd_submit * -msm_submit_sp_new(struct fd_pipe *pipe) -{ - struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit)); - struct fd_submit *submit; - static unsigned submit_cnt = 0; - - msm_submit->seqno = ++submit_cnt; - msm_submit->bo_table = _mesa_hash_table_create(NULL, - _mesa_hash_pointer, _mesa_key_pointer_equal); - // TODO tune size: - slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer_sp), 16); - - submit = &msm_submit->base; - submit->pipe = pipe; - submit->funcs = &submit_funcs; - - return submit; -} - - -static void -finalize_current_cmd(struct fd_ringbuffer *ring) -{ - debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); - - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - unsigned idx = APPEND(&msm_ring->u, cmds); - - msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo); - msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start); -} - -static void -msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size) -{ - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - struct fd_pipe *pipe = msm_ring->u.submit->pipe; - - debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE); - - finalize_current_cmd(ring); - - fd_bo_del(msm_ring->ring_bo); - msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, - DRM_FREEDRENO_GEM_GPUREADONLY); - - ring->start = fd_bo_map(msm_ring->ring_bo); - ring->end = &(ring->start[size/4]); - ring->cur = ring->start; - ring->size = size; -} - -static void -msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring, - const struct fd_reloc *reloc) -{ - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - struct fd_pipe *pipe; - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - unsigned idx = APPEND(&msm_ring->u, reloc_bos); - - msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo); - msm_ring->u.reloc_bos[idx].flags = reloc->flags; - - pipe = msm_ring->u.pipe; - } else { - struct msm_submit_sp *msm_submit = - to_msm_submit_sp(msm_ring->u.submit); - - append_bo(msm_submit, reloc->bo, reloc->flags); - - pipe = msm_ring->u.submit->pipe; - } - - uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset; - uint32_t dword = iova; - int shift = reloc->shift; - - if (shift < 0) - dword >>= -shift; - else - dword <<= shift; - - (*ring->cur++) = dword | reloc->or; - - if (pipe->gpu_id >= 500) { - dword = iova >> 32; - shift -= 32; - - if (shift < 0) - dword >>= -shift; - else - dword <<= shift; - - (*ring->cur++) = dword | reloc->orhi; - } -} - -static uint32_t -msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring, - struct fd_ringbuffer *target, uint32_t cmd_idx) -{ - struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target); - struct fd_bo *bo; - uint32_t size; - - if ((target->flags & FD_RINGBUFFER_GROWABLE) && - (cmd_idx < msm_target->u.nr_cmds)) { - bo = msm_target->u.cmds[cmd_idx].ring_bo; - size = msm_target->u.cmds[cmd_idx].size; - } else { - bo = msm_target->ring_bo; - size = offset_bytes(target->cur, target->start); - } - - msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){ - .bo = bo, - .flags = FD_RELOC_READ | FD_RELOC_DUMP, - .offset = msm_target->offset, - }); - - if (!(target->flags & _FD_RINGBUFFER_OBJECT)) - return size; - - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { - unsigned idx = APPEND(&msm_ring->u, reloc_bos); - - msm_ring->u.reloc_bos[idx].bo = - fd_bo_ref(msm_target->u.reloc_bos[i].bo); - msm_ring->u.reloc_bos[idx].flags = - msm_target->u.reloc_bos[i].flags; - } - } else { - // TODO it would be nice to know whether we have already - // seen this target before. But hopefully we hit the - // append_bo() fast path enough for this to not matter: - struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit); - - for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { - append_bo(msm_submit, msm_target->u.reloc_bos[i].bo, - msm_target->u.reloc_bos[i].flags); - } - } - - return size; -} - -static uint32_t -msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring) -{ - if (ring->flags & FD_RINGBUFFER_GROWABLE) - return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1; - return 1; -} - -static void -msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring) -{ - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - - fd_bo_del(msm_ring->ring_bo); - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) { - fd_bo_del(msm_ring->u.reloc_bos[i].bo); - } - - free(msm_ring); - } else { - struct fd_submit *submit = msm_ring->u.submit; - - for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) { - fd_bo_del(msm_ring->u.cmds[i].ring_bo); - } - - slab_free_st(&to_msm_submit_sp(submit)->ring_pool, msm_ring); - } -} - -static const struct fd_ringbuffer_funcs ring_funcs = { - .grow = msm_ringbuffer_sp_grow, - .emit_reloc = msm_ringbuffer_sp_emit_reloc, - .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring, - .cmd_count = msm_ringbuffer_sp_cmd_count, - .destroy = msm_ringbuffer_sp_destroy, -}; - -static inline struct fd_ringbuffer * -msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size, - enum fd_ringbuffer_flags flags) -{ - struct fd_ringbuffer *ring = &msm_ring->base; - - debug_assert(msm_ring->ring_bo); - - uint8_t *base = fd_bo_map(msm_ring->ring_bo); - ring->start = (void *)(base + msm_ring->offset); - ring->end = &(ring->start[size/4]); - ring->cur = ring->start; - - ring->size = size; - ring->flags = flags; - - ring->funcs = &ring_funcs; - - // TODO initializing these could probably be conditional on flags - // since unneed for FD_RINGBUFFER_STAGING case.. - msm_ring->u.cmds = NULL; - msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0; - - msm_ring->u.reloc_bos = NULL; - msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0; - - return ring; -} - -struct fd_ringbuffer * -msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size) -{ - struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring)); - - msm_ring->u.pipe = pipe; - msm_ring->offset = 0; - msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, - DRM_FREEDRENO_GEM_GPUREADONLY); - msm_ring->base.refcnt = 1; - - return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT); -} diff --git a/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.h b/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.h deleted file mode 100644 index 79dba3b84..000000000 --- a/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright © 2021 Google, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifdef X -# undef X -#endif - -#if PTRSZ == 32 -# define X(n) n ## _32 -#else -# define X(n) n ## _64 -#endif - - -static void -X(emit_reloc_common)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc) -{ - (*ring->cur++) = (uint32_t)reloc->iova; -#if PTRSZ == 64 - (*ring->cur++) = (uint32_t)(reloc->iova >> 32); -#endif -} - -static void -X(msm_ringbuffer_sp_emit_reloc_nonobj)(struct fd_ringbuffer *ring, - const struct fd_reloc *reloc) -{ - X(emit_reloc_common)(ring, reloc); - - assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); - - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - - struct msm_submit_sp *msm_submit = - to_msm_submit_sp(msm_ring->u.submit); - - msm_submit_append_bo(msm_submit, reloc->bo); -} - -static void -X(msm_ringbuffer_sp_emit_reloc_obj)(struct fd_ringbuffer *ring, - const struct fd_reloc *reloc) -{ - X(emit_reloc_common)(ring, reloc); - - assert(ring->flags & _FD_RINGBUFFER_OBJECT); - - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - - /* Avoid emitting duplicate BO references into the list. Ringbuffer - * objects are long-lived, so this saves ongoing work at draw time in - * exchange for a bit at context setup/first draw. And the number of - * relocs per ringbuffer object is fairly small, so the O(n^2) doesn't - * hurt much. - */ - bool found = false; - for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) { - if (msm_ring->u.reloc_bos[i] == reloc->bo) { - found = true; - break; - } - } - if (!found) { - APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(reloc->bo)); - } -} - -static uint32_t -X(msm_ringbuffer_sp_emit_reloc_ring)(struct fd_ringbuffer *ring, - struct fd_ringbuffer *target, uint32_t cmd_idx) -{ - struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target); - struct fd_bo *bo; - uint32_t size; - - if ((target->flags & FD_RINGBUFFER_GROWABLE) && - (cmd_idx < msm_target->u.nr_cmds)) { - bo = msm_target->u.cmds[cmd_idx].ring_bo; - size = msm_target->u.cmds[cmd_idx].size; - } else { - bo = msm_target->ring_bo; - size = offset_bytes(target->cur, target->start); - } - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - X(msm_ringbuffer_sp_emit_reloc_obj)(ring, &(struct fd_reloc){ - .bo = bo, - .iova = bo->iova + msm_target->offset, - .offset = msm_target->offset, - }); - } else { - X(msm_ringbuffer_sp_emit_reloc_nonobj)(ring, &(struct fd_reloc){ - .bo = bo, - .iova = bo->iova + msm_target->offset, - .offset = msm_target->offset, - }); - } - - if (!(target->flags & _FD_RINGBUFFER_OBJECT)) - return size; - - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { - APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(msm_target->u.reloc_bos[i])); - } - } else { - // TODO it would be nice to know whether we have already - // seen this target before. But hopefully we hit the - // append_bo() fast path enough for this to not matter: - struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit); - - for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { - msm_submit_append_bo(msm_submit, msm_target->u.reloc_bos[i]); - } - } - - return size; -} diff --git a/lib/mesa/src/freedreno/fdl/fd6_layout_test.c b/lib/mesa/src/freedreno/fdl/fd6_layout_test.c index 91639843d..f4eda1135 100644 --- a/lib/mesa/src/freedreno/fdl/fd6_layout_test.c +++ b/lib/mesa/src/freedreno/fdl/fd6_layout_test.c @@ -683,6 +683,243 @@ static const struct testcase }, }, }, + + /* Easy 32x32x32 3d case */ + { + .format = PIPE_FORMAT_R9G9B9E5_FLOAT, + .is_3d = true, + .layout = + { + .tile_mode = TILE6_3, + .ubwc = false, + .width0 = 32, + .height0 = 32, + .depth0 = 32, + .slices = + { + {.offset = 0, .pitch = 256, .size0 = 8192}, + {.offset = 262144, .pitch = 256, .size0 = 4096}, + {.offset = 327680, .pitch = 256, .size0 = 4096}, + {.offset = 360448, .pitch = 256, .size0 = 4096}, + {.offset = 376832, .pitch = 256, .size0 = 4096}, + {.offset = 385024, .pitch = 256}, + }, + }, + }, + + /* Scale up a bit to 128x128x32 3d */ + { + .format = PIPE_FORMAT_R9G9B9E5_FLOAT, + .is_3d = true, + .layout = + { + .tile_mode = TILE6_3, + .ubwc = false, + .width0 = 128, + .height0 = 128, + .depth0 = 32, + .slices = + { + {.offset = 0, .pitch = 512, .size0 = 65536}, + {.offset = 2097152, .pitch = 256, .size0 = 16384}, + {.offset = 2359296, .pitch = 256, .size0 = 8192}, + {.offset = 2424832, .pitch = 256, .size0 = 8192}, + {.offset = 2457600, .pitch = 256, .size0 = 8192}, + {.offset = 2473984, .pitch = 256}, + {.offset = 2482176, .pitch = 256}, + {.offset = 2490368, .pitch = 256}, + }, + }, + }, + + /* Changing width to 1 changes where minimum layer size happens. */ + { + .format = PIPE_FORMAT_R9G9B9E5_FLOAT, + .is_3d = true, + .layout = + { + .tile_mode = TILE6_LINEAR, + .ubwc = false, + .width0 = 1, + .height0 = 128, + .depth0 = 32, + .slices = + { + {.offset = 0, .pitch = 256, .size0 = 32768}, + {.offset = 1048576, .pitch = 256, .size0 = 16384}, + {.offset = 1310720, .pitch = 256, .size0 = 16384}, + {.offset = 1441792, .pitch = 256, .size0 = 16384}, + {.offset = 1507328, .pitch = 256, .size0 = 16384}, + {.offset = 1540096, .pitch = 256}, + {.offset = 1556480, .pitch = 256}, + {.offset = 1572864, .pitch = 256}, + }, + }, + }, + + /* And increasing width makes it happen later. */ + { + .format = PIPE_FORMAT_R9G9B9E5_FLOAT, + .is_3d = true, + .layout = + { + .tile_mode = TILE6_3, + .ubwc = false, + .width0 = 1024, + .height0 = 128, + .depth0 = 32, + .slices = + { + {.offset = 0, .pitch = 4096, .size0 = 524288}, + {.offset = 16777216, .pitch = 2048, .size0 = 131072}, + {.offset = 18874368, .pitch = 1024, .size0 = 32768}, + {.offset = 19136512, .pitch = 512, .size0 = 8192}, + {.offset = 19169280, .pitch = 256, .size0 = 4096}, + {.offset = 19177472, .pitch = 256}, + {.offset = 19181568, .pitch = 256}, + {.offset = 19185664, .pitch = 256}, + {.offset = 19189760, .pitch = 256}, + {.offset = 19193856, .pitch = 256}, + {.offset = 19197952, .pitch = 256}, + }, + }, + }, + + /* NPOT height case that piglit was catching 3d texture failure in, we + * use a higher depth though to get more slice pitches detected from + * the blob. + */ + { + .format = PIPE_FORMAT_R9G9B9E5_FLOAT, + .is_3d = true, + .layout = + { + .tile_mode = TILE6_3, + .ubwc = false, + .width0 = 128, + .height0 = 129, + .depth0 = 16, + .slices = + { + {.offset = 0, .pitch = 512, .size0 = 73728}, + {.offset = 1179648, .pitch = 256, .size0 = 20480}, + {.offset = 1343488, .pitch = 256, .size0 = 20480}, + {.offset = 1425408, .pitch = 256, .size0 = 20480}, + {.offset = 1466368, .pitch = 256}, + {.offset = 1486848, .pitch = 256}, + {.offset = 1507328, .pitch = 256}, + {.offset = 1527808, .pitch = 256}, + }, + }, + }, + + /* NPOT height case that my first 3d layout ideas failed on. */ + { + .format = PIPE_FORMAT_R9G9B9E5_FLOAT, + .is_3d = true, + .layout = + { + .tile_mode = TILE6_3, + .ubwc = false, + .width0 = 128, + .height0 = 132, + .depth0 = 16, + .slices = + { + {.offset = 0, .pitch = 512, .size0 = 73728}, + {.offset = 1179648, .pitch = 256, .size0 = 20480}, + {.offset = 1343488, .pitch = 256, .size0 = 20480}, + {.offset = 1425408, .pitch = 256, .size0 = 20480}, + {.offset = 1466368, .pitch = 256}, + {.offset = 1486848, .pitch = 256}, + {.offset = 1507328, .pitch = 256}, + {.offset = 1527808, .pitch = 256}, + }, + }, + }, + + /* blob used MIN_LAYERSZ = 0x3000 here. + * + * This is an interesting case for 3d layout, since pitch stays NPOT for a while. + */ + { + .format = PIPE_FORMAT_R9G9B9E5_FLOAT, + .is_3d = true, + .layout = + { + .tile_mode = TILE6_3, + .ubwc = false, + .width0 = 768, + .height0 = 32, + .depth0 = 128, + .slices = + { + {.offset = 0, .pitch = 3072, .size0 = 98304}, + {.offset = 12582912, .pitch = 1536, .size0 = 24576}, + {.offset = 14155776, .pitch = 768, .size0 = 12288}, + {.offset = 14548992, .pitch = 512, .size0 = 12288}, + {.offset = 14745600, .pitch = 256, .size0 = 12288}, + {.offset = 14843904, .pitch = 256, .size0 = 12288}, + {.offset = 14893056, .pitch = 256, .size0 = 12288}, + {.offset = 14917632, .pitch = 256}, + {.offset = 14929920, .pitch = 256}, + {.offset = 14942208, .pitch = 256}, + }, + }, + }, + + /* dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.rgba32f_rg11_eac.texture3d_to_texture2d */ +#if 0 /* XXX: We disagree with the blob about level 0 size0, but the testcase passes. */ + { + .format = PIPE_FORMAT_R32G32B32A32_FLOAT, + .is_3d = true, + .layout = + { + .tile_mode = TILE6_3, + .ubwc = false, + .width0 = 129, + .height0 = 129, + .depth0 = 17, + .slices = + { + {.offset = 0, .pitch = 3072, .size0 = 524288}, + {.offset = 8912896, .pitch = 2048, .size0 = 131072}, + {.offset = 9961472, .pitch = 1024, .size0 = 32768}, + {.offset = 10092544, .pitch = 1024, .size0 = 16384}, + {.offset = 10125312, .pitch = 1024}, + {.offset = 10141696, .pitch = 1024}, + {.offset = 10158080, .pitch = 1024}, + {.offset = 10174464, .pitch = 1024}, + }, + }, + }, +#endif + + /* Size minification issue found while looking at the above test. */ + { + .format = PIPE_FORMAT_R32G32B32A32_FLOAT, + .is_3d = true, + .layout = + { + .tile_mode = TILE6_3, + .ubwc = false, + .width0 = 129, + .height0 = 9, + .depth0 = 8, + .slices = + { + {.offset = 0, .pitch = 3072, .size0 = 49152}, + {.offset = 393216, .pitch = 2048, .size0 = 32768}, + {.offset = 524288, .pitch = 1024, .size0 = 32768}, + {.offset = 589824, .pitch = 1024}, + {.offset = 622592, .pitch = 1024}, + {.offset = 655360, .pitch = 1024}, + {.offset = 688128, .pitch = 1024}, + {.offset = 720896, .pitch = 1024}, + }, + }, + }, + }; int diff --git a/lib/mesa/src/freedreno/ir3/ir3_dce.c b/lib/mesa/src/freedreno/ir3/ir3_dce.c index 76298e64a..a3ddbe802 100644 --- a/lib/mesa/src/freedreno/ir3/ir3_dce.c +++ b/lib/mesa/src/freedreno/ir3/ir3_dce.c @@ -53,8 +53,10 @@ instr_dce(struct ir3_instruction *instr, bool falsedep) if (ir3_instr_check_mark(instr)) return; - if (writes_gpr(instr)) - mark_array_use(instr, instr->dsts[0]); /* dst */ + foreach_dst (dst, instr) { + if (is_dest_gpr(dst)) + mark_array_use(instr, dst); + } foreach_src (reg, instr) mark_array_use(instr, reg); /* src */ diff --git a/lib/mesa/src/freedreno/ir3/ir3_delay.c b/lib/mesa/src/freedreno/ir3/ir3_delay.c index 14bb403b9..054f4c831 100644 --- a/lib/mesa/src/freedreno/ir3/ir3_delay.c +++ b/lib/mesa/src/freedreno/ir3/ir3_delay.c @@ -30,19 +30,6 @@ */ #define MAX_NOPS 6 -/* The soft delay for approximating the cost of (ss). On a6xx, it takes the - * number of delay slots to get a SFU result back (ie. using nop's instead of - * (ss) is: - * - * 8 - single warp - * 9 - two warps - * 10 - four warps - * - * and so on. Not quite sure where it tapers out (ie. how many warps share an - * SFU unit). But 10 seems like a reasonable # to choose: - */ -#define SOFT_SS_NOPS 10 - /* * Helpers to figure out the necessary delay slots between instructions. Used * both in scheduling pass(es) and the final pass to insert any required nop's @@ -76,11 +63,11 @@ ir3_delayslots(struct ir3_instruction *assigner, if (writes_addr0(assigner) || writes_addr1(assigner)) return 6; - if (soft && is_sfu(assigner)) - return SOFT_SS_NOPS; + if (soft && is_ss_producer(assigner)) + return soft_ss_delay(assigner); /* handled via sync flags: */ - if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner)) + if (is_ss_producer(assigner) || is_sy_producer(assigner)) return 0; /* As far as we know, shader outputs don't need any delay. */ @@ -89,7 +76,7 @@ ir3_delayslots(struct ir3_instruction *assigner, /* assigner must be alu: */ if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) || - is_mem(consumer) || (assigner->dsts[0]->flags & IR3_REG_SHARED)) { + is_mem(consumer)) { return 6; } else { /* In mergedregs mode, there is an extra 2-cycle penalty when half of @@ -119,74 +106,6 @@ count_instruction(struct ir3_instruction *n) (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_B)); } -static unsigned -distance(struct ir3_block *block, struct ir3_instruction *instr, unsigned maxd) -{ - unsigned d = 0; - - /* Note that this relies on incrementally building up the block's - * instruction list.. but this is how scheduling and nopsched - * work. - */ - foreach_instr_rev (n, &block->instr_list) { - if ((n == instr) || (d >= maxd)) - return MIN2(maxd, d + n->nop); - if (count_instruction(n)) - d = MIN2(maxd, d + 1 + n->repeat + n->nop); - } - - return maxd; -} - -static unsigned -delay_calc_srcn_prera(struct ir3_block *block, struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned srcn) -{ - unsigned delay = 0; - - if (assigner->opc == OPC_META_PHI) - return 0; - - if (is_meta(assigner)) { - foreach_src_n (src, n, assigner) { - unsigned d; - - if (!src->def) - continue; - - d = delay_calc_srcn_prera(block, src->def->instr, consumer, srcn); - delay = MAX2(delay, d); - } - } else { - delay = ir3_delayslots(assigner, consumer, srcn, false); - delay -= distance(block, assigner, delay); - } - - return delay; -} - -/** - * Calculate delay for instruction before register allocation, using SSA - * source pointers. This can't handle inter-block dependencies. - */ -unsigned -ir3_delay_calc_prera(struct ir3_block *block, struct ir3_instruction *instr) -{ - unsigned delay = 0; - - foreach_src_n (src, i, instr) { - unsigned d = 0; - - if (src->def && src->def->instr->block == block) { - d = delay_calc_srcn_prera(block, src->def->instr, instr, i); - } - - delay = MAX2(delay, d); - } - - return delay; -} - /* Post-RA, we don't have arrays any more, so we have to be a bit careful here * and have to handle relative accesses specially. */ @@ -207,35 +126,21 @@ post_ra_reg_num(struct ir3_register *reg) return reg->num; } -static unsigned -delay_calc_srcn_postra(struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned assigner_n, - unsigned consumer_n, bool soft, bool mergedregs) +unsigned +ir3_delayslots_with_repeat(struct ir3_instruction *assigner, + struct ir3_instruction *consumer, + unsigned assigner_n, unsigned consumer_n) { + unsigned delay = ir3_delayslots(assigner, consumer, consumer_n, false); + struct ir3_register *src = consumer->srcs[consumer_n]; struct ir3_register *dst = assigner->dsts[assigner_n]; - bool mismatched_half = - (src->flags & IR3_REG_HALF) != (dst->flags & IR3_REG_HALF); - /* In the mergedregs case or when the register is a special register, - * half-registers do not alias with full registers. - */ - if ((!mergedregs || is_reg_special(src) || is_reg_special(dst)) && - mismatched_half) - return 0; + if (assigner->repeat == 0 && consumer->repeat == 0) + return delay; unsigned src_start = post_ra_reg_num(src) * reg_elem_size(src); - unsigned src_end = src_start + post_ra_reg_elems(src) * reg_elem_size(src); unsigned dst_start = post_ra_reg_num(dst) * reg_elem_size(dst); - unsigned dst_end = dst_start + post_ra_reg_elems(dst) * reg_elem_size(dst); - - if (dst_start >= src_end || src_start >= dst_end) - return 0; - - unsigned delay = ir3_delayslots(assigner, consumer, consumer_n, soft); - - if (assigner->repeat == 0 && consumer->repeat == 0) - return delay; /* If either side is a relative access, we can't really apply most of the * reasoning below because we don't know which component aliases which. @@ -250,6 +155,9 @@ delay_calc_srcn_postra(struct ir3_instruction *assigner, if (assigner->opc == OPC_MOVMSK) return delay; + bool mismatched_half = + (src->flags & IR3_REG_HALF) != (dst->flags & IR3_REG_HALF); + /* TODO: Handle the combination of (rpt) and different component sizes * better like below. This complicates things significantly because the * components don't line up. @@ -303,10 +211,41 @@ delay_calc_srcn_postra(struct ir3_instruction *assigner, } static unsigned -delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, - struct ir3_instruction *consumer, unsigned distance, - bool soft, bool pred, bool mergedregs) +delay_calc_srcn(struct ir3_instruction *assigner, + struct ir3_instruction *consumer, unsigned assigner_n, + unsigned consumer_n, bool mergedregs) +{ + struct ir3_register *src = consumer->srcs[consumer_n]; + struct ir3_register *dst = assigner->dsts[assigner_n]; + bool mismatched_half = + (src->flags & IR3_REG_HALF) != (dst->flags & IR3_REG_HALF); + + /* In the mergedregs case or when the register is a special register, + * half-registers do not alias with full registers. + */ + if ((!mergedregs || is_reg_special(src) || is_reg_special(dst)) && + mismatched_half) + return 0; + + unsigned src_start = post_ra_reg_num(src) * reg_elem_size(src); + unsigned src_end = src_start + post_ra_reg_elems(src) * reg_elem_size(src); + unsigned dst_start = post_ra_reg_num(dst) * reg_elem_size(dst); + unsigned dst_end = dst_start + post_ra_reg_elems(dst) * reg_elem_size(dst); + + if (dst_start >= src_end || src_start >= dst_end) + return 0; + + return ir3_delayslots_with_repeat(assigner, consumer, assigner_n, consumer_n); +} + +static unsigned +delay_calc(struct ir3_block *block, struct ir3_instruction *start, + struct ir3_instruction *consumer, unsigned distance, + regmask_t *in_mask, bool mergedregs) { + regmask_t mask; + memcpy(&mask, in_mask, sizeof(mask)); + unsigned delay = 0; /* Search backwards starting at the instruction before start, unless it's * NULL then search backwards from the block end. @@ -318,7 +257,7 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, if (count_instruction(assigner)) distance += assigner->nop; - if (distance + delay >= (soft ? SOFT_SS_NOPS : MAX_NOPS)) + if (distance + delay >= MAX_NOPS) return delay; if (is_meta(assigner)) @@ -329,14 +268,17 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, foreach_dst_n (dst, dst_n, assigner) { if (dst->wrmask == 0) continue; + if (!regmask_get(&mask, dst)) + continue; foreach_src_n (src, src_n, consumer) { if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) continue; - unsigned src_delay = delay_calc_srcn_postra( - assigner, consumer, dst_n, src_n, soft, mergedregs); + unsigned src_delay = delay_calc_srcn( + assigner, consumer, dst_n, src_n, mergedregs); new_delay = MAX2(new_delay, src_delay); } + regmask_clear(&mask, dst); } new_delay = new_delay > distance ? new_delay - distance : 0; @@ -360,13 +302,13 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, * However any other recursion would be unnecessary. */ - if (pred && block->data != block) { + if (block->data != block) { block->data = block; for (unsigned i = 0; i < block->predecessors_count; i++) { struct ir3_block *pred = block->predecessors[i]; - unsigned pred_delay = delay_calc_postra(pred, NULL, consumer, distance, - soft, pred, mergedregs); + unsigned pred_delay = delay_calc(pred, NULL, consumer, distance, + &mask, mergedregs); delay = MAX2(delay, pred_delay); } @@ -377,50 +319,19 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, } /** - * Calculate delay for post-RA scheduling based on physical registers but not - * exact (i.e. don't recurse into predecessors, and make it possible to - * estimate impact of sync flags). - * - * @soft: If true, add additional delay for situations where they - * would not be strictly required because a sync flag would be - * used (but scheduler would prefer to schedule some other - * instructions first to avoid stalling on sync flag) - * @mergedregs: True if mergedregs is enabled. - */ -unsigned -ir3_delay_calc_postra(struct ir3_block *block, struct ir3_instruction *instr, - bool soft, bool mergedregs) -{ - return delay_calc_postra(block, NULL, instr, 0, soft, false, mergedregs); -} - -/** * Calculate delay for nop insertion. This must exactly match hardware * requirements, including recursing into predecessor blocks. */ unsigned -ir3_delay_calc_exact(struct ir3_block *block, struct ir3_instruction *instr, - bool mergedregs) +ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr, + bool mergedregs) { - return delay_calc_postra(block, NULL, instr, 0, false, true, mergedregs); -} - -/** - * Remove nop instructions. The scheduler can insert placeholder nop's - * so that ir3_delay_calc() can account for nop's that won't be needed - * due to nop's triggered by a previous instruction. However, before - * legalize, we want to remove these. The legalize pass can insert - * some nop's if needed to hold (for example) sync flags. This final - * remaining nops are inserted by legalize after this. - */ -void -ir3_remove_nops(struct ir3 *ir) -{ - foreach_block (block, &ir->block_list) { - foreach_instr_safe (instr, &block->instr_list) { - if (instr->opc == OPC_NOP) { - list_del(&instr->node); - } - } + regmask_t mask; + regmask_init(&mask, mergedregs); + foreach_src (src, instr) { + if (!(src->flags & (IR3_REG_IMMED | IR3_REG_CONST))) + regmask_set(&mask, src); } + + return delay_calc(block, NULL, instr, 0, &mask, mergedregs); } diff --git a/lib/mesa/src/freedreno/ir3/ir3_lexer.l b/lib/mesa/src/freedreno/ir3/ir3_lexer.l index 2d5582e5b..52b977896 100644 --- a/lib/mesa/src/freedreno/ir3/ir3_lexer.l +++ b/lib/mesa/src/freedreno/ir3/ir3_lexer.l @@ -72,16 +72,6 @@ static int parse_reg(const char *str) return num; } -static int parse_w(const char *str) -{ - str++; - unsigned num = strtol(str, NULL, 10); - if ((num % 32) != 0) - yy_fatal_error("w# must be multiple of 32"); - if (num < 32) - yy_fatal_error("w# must be at least 32"); - return num / 32; -} %} %option noyywrap @@ -139,7 +129,7 @@ static int parse_w(const char *str) "a0.x" return T_A0; "a1.x" return T_A1; "p0."[xyzw] ir3_yylval.num = parse_reg(yytext); return T_P0; -"w"[0-9]+ ir3_yylval.num = parse_w(yytext); return T_W; +"w"[0-9]+ ir3_yylval.num = strtol(yytext+1, NULL, 10); return T_W; "s#"[0-9]+ ir3_yylval.num = strtol(yytext+2, NULL, 10); return T_SAMP; "t#"[0-9]+ ir3_yylval.num = strtol(yytext+2, NULL, 10); return T_TEX; @@ -167,6 +157,7 @@ static int parse_w(const char *str) "stkr" return TOKEN(T_OP_STKR); "xset" return TOKEN(T_OP_XSET); "xclr" return TOKEN(T_OP_XCLR); +"getlast" return TOKEN(T_OP_GETLAST); "getone" return TOKEN(T_OP_GETONE); "dbg" return TOKEN(T_OP_DBG); "shps" return TOKEN(T_OP_SHPS); @@ -228,6 +219,7 @@ static int parse_w(const char *str) "shr.b" return TOKEN(T_OP_SHR_B); "ashr.b" return TOKEN(T_OP_ASHR_B); "bary.f" return TOKEN(T_OP_BARY_F); +"flat.b" return TOKEN(T_OP_FLAT_B); "mgen.b" return TOKEN(T_OP_MGEN_B); "getbit.b" return TOKEN(T_OP_GETBIT_B); "setrm" return TOKEN(T_OP_SETRM); @@ -252,7 +244,15 @@ static int parse_w(const char *str) "sel.f32" return TOKEN(T_OP_SEL_F32); "sad.s16" return TOKEN(T_OP_SAD_S16); "sad.s32" return TOKEN(T_OP_SAD_S32); -"shlg.b16" return TOKEN(T_OP_SHLG_B16); +"shrm" return TOKEN(T_OP_SHRM); +"shlm" return TOKEN(T_OP_SHLM); +"shrg" return TOKEN(T_OP_SHRG); +"shlg" return TOKEN(T_OP_SHLG); +"andg" return TOKEN(T_OP_ANDG); +"dp2acc" return TOKEN(T_OP_DP2ACC); +"dp4acc" return TOKEN(T_OP_DP4ACC); +"wmm" return TOKEN(T_OP_WMM); +"wmm.accu" return TOKEN(T_OP_WMM_ACCU); /* category 4: */ "rcp" return TOKEN(T_OP_RCP); @@ -295,6 +295,11 @@ static int parse_w(const char *str) "dsypp.1" return TOKEN(T_OP_DSYPP_1); "rgetpos" return TOKEN(T_OP_RGETPOS); "rgetinfo" return TOKEN(T_OP_RGETINFO); +"brcst.active" return TOKEN(T_OP_BRCST_A); +"quad_shuffle.brcst" return TOKEN(T_OP_QSHUFFLE_BRCST); +"quad_shuffle.horiz" return TOKEN(T_OP_QSHUFFLE_H); +"quad_shuffle.vert" return TOKEN(T_OP_QSHUFFLE_V); +"quad_shuffle.diag" return TOKEN(T_OP_QSHUFFLE_DIAG); /* category 6: */ "ldg" return TOKEN(T_OP_LDG); @@ -338,6 +343,29 @@ static int parse_w(const char *str) "atomic.b.and" return TOKEN(T_OP_ATOMIC_B_AND); "atomic.b.or" return TOKEN(T_OP_ATOMIC_B_OR); "atomic.b.xor" return TOKEN(T_OP_ATOMIC_B_XOR); +"atomic.s.add" return TOKEN(T_OP_ATOMIC_S_ADD); +"atomic.s.sub" return TOKEN(T_OP_ATOMIC_S_SUB); +"atomic.s.xchg" return TOKEN(T_OP_ATOMIC_S_XCHG); +"atomic.s.inc" return TOKEN(T_OP_ATOMIC_S_INC); +"atomic.s.dec" return TOKEN(T_OP_ATOMIC_S_DEC); +"atomic.s.cmpxchg" return TOKEN(T_OP_ATOMIC_S_CMPXCHG); +"atomic.s.min" return TOKEN(T_OP_ATOMIC_S_MIN); +"atomic.s.max" return TOKEN(T_OP_ATOMIC_S_MAX); +"atomic.s.and" return TOKEN(T_OP_ATOMIC_S_AND); +"atomic.s.or" return TOKEN(T_OP_ATOMIC_S_OR); +"atomic.s.xor" return TOKEN(T_OP_ATOMIC_S_XOR); +"atomic.g.add" return TOKEN(T_OP_ATOMIC_G_ADD); +"atomic.g.sub" return TOKEN(T_OP_ATOMIC_G_SUB); +"atomic.g.xchg" return TOKEN(T_OP_ATOMIC_G_XCHG); +"atomic.g.inc" return TOKEN(T_OP_ATOMIC_G_INC); +"atomic.g.dec" return TOKEN(T_OP_ATOMIC_G_DEC); +"atomic.g.cmpxchg" return TOKEN(T_OP_ATOMIC_G_CMPXCHG); +"atomic.g.min" return TOKEN(T_OP_ATOMIC_G_MIN); +"atomic.g.max" return TOKEN(T_OP_ATOMIC_G_MAX); +"atomic.g.and" return TOKEN(T_OP_ATOMIC_G_AND); +"atomic.g.or" return TOKEN(T_OP_ATOMIC_G_OR); +"atomic.g.xor" return TOKEN(T_OP_ATOMIC_G_XOR); + "ldgb" return TOKEN(T_OP_LDGB); "stgb" return TOKEN(T_OP_STGB); "stib" return TOKEN(T_OP_STIB); @@ -345,6 +373,8 @@ static int parse_w(const char *str) "ldlv" return TOKEN(T_OP_LDLV); "getspid" return TOKEN(T_OP_GETSPID); "getwid" return TOKEN(T_OP_GETWID); +"getfiberid" return TOKEN(T_OP_GETFIBERID); +"stc" return TOKEN(T_OP_STC); /* category 7: */ "bar" return TOKEN(T_OP_BAR); @@ -362,6 +392,11 @@ static int parse_w(const char *str) "untyped" return TOKEN(T_UNTYPED); "typed" return TOKEN(T_TYPED); +"unsigned" return TOKEN(T_UNSIGNED); +"mixed" return TOKEN(T_MIXED); +"low" return TOKEN(T_LOW); +"high" return TOKEN(T_HIGH); + "1d" return TOKEN(T_1D); "2d" return TOKEN(T_2D); "3d" return TOKEN(T_3D); @@ -379,6 +414,7 @@ static int parse_w(const char *str) "p" return 'p'; "s2en" return TOKEN(T_S2EN); "s" return 's'; +"k" return 'k'; "base"[0-9]+ ir3_yylval.num = strtol(yytext+4, NULL, 10); return T_BASE; "offset"[0-9]+ ir3_yylval.num = strtol(yytext+6, NULL, 10); return T_OFFSET; "uniform" return T_UNIFORM; diff --git a/lib/mesa/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c b/lib/mesa/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c deleted file mode 100644 index 37a3dcb26..000000000 --- a/lib/mesa/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright © 2017 Ilia Mirkin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "ir3_nir.h" -#include "compiler/nir/nir_builder.h" - -/* A4XX has a broken GATHER4 operation. It performs the texture swizzle on the - * gather results, rather than before. As a result, it must be emulated with - * direct texture calls. - */ - -static bool -lower_tg4(nir_block *block, nir_builder *b, void *mem_ctx) -{ - bool progress = false; - - static const int offsets[3][2] = { {0, 1}, {1, 1}, {1, 0} }; - - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_tex) - continue; - - nir_tex_instr *tg4 = (nir_tex_instr *)instr; - - if (tg4->op != nir_texop_tg4) - continue; - - b->cursor = nir_before_instr(&tg4->instr); - - nir_ssa_def *results[4]; - int offset_index = nir_tex_instr_src_index(tg4, nir_tex_src_offset); - for (int i = 0; i < 4; i++) { - int num_srcs = tg4->num_srcs + 1 /* lod */; - if (offset_index < 0 && i < 3) - num_srcs++; - - nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs); - tex->op = nir_texop_txl; - tex->sampler_dim = tg4->sampler_dim; - tex->coord_components = tg4->coord_components; - tex->is_array = tg4->is_array; - tex->is_shadow = tg4->is_shadow; - tex->is_new_style_shadow = tg4->is_new_style_shadow; - tex->texture_index = tg4->texture_index; - tex->sampler_index = tg4->sampler_index; - tex->dest_type = tg4->dest_type; - - for (int j = 0; j < tg4->num_srcs; j++) { - nir_src_copy(&tex->src[j].src, &tg4->src[j].src, tex); - tex->src[j].src_type = tg4->src[j].src_type; - } - if (i != 3) { - nir_ssa_def *offset = - nir_vec2(b, nir_imm_int(b, offsets[i][0]), - nir_imm_int(b, offsets[i][1])); - if (offset_index < 0) { - tex->src[tg4->num_srcs].src = nir_src_for_ssa(offset); - tex->src[tg4->num_srcs].src_type = nir_tex_src_offset; - } else { - assert(nir_tex_instr_src_size(tex, offset_index) == 2); - nir_ssa_def *orig = nir_ssa_for_src( - b, tex->src[offset_index].src, 2); - tex->src[offset_index].src = - nir_src_for_ssa(nir_iadd(b, orig, offset)); - } - } - tex->src[num_srcs - 1].src = nir_src_for_ssa(nir_imm_float(b, 0)); - tex->src[num_srcs - 1].src_type = nir_tex_src_lod; - - nir_ssa_dest_init(&tex->instr, &tex->dest, - nir_tex_instr_dest_size(tex), 32, NULL); - nir_builder_instr_insert(b, &tex->instr); - - results[i] = nir_channel(b, &tex->dest.ssa, tg4->component); - } - - nir_ssa_def *result = nir_vec4(b, results[0], results[1], results[2], results[3]); - nir_ssa_def_rewrite_uses(&tg4->dest.ssa, nir_src_for_ssa(result)); - - nir_instr_remove(&tg4->instr); - - progress = true; - } - - return progress; -} - -static bool -lower_tg4_func(nir_function_impl *impl) -{ - void *mem_ctx = ralloc_parent(impl); - nir_builder b; - nir_builder_init(&b, impl); - - bool progress = false; - nir_foreach_block_safe(block, impl) { - progress |= lower_tg4(block, &b, mem_ctx); - } - - if (progress) - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - - return progress; -} - -bool -ir3_nir_lower_tg4_to_tex(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(function, shader) { - if (function->impl) - progress |= lower_tg4_func(function->impl); - } - - return progress; -} diff --git a/lib/mesa/src/freedreno/ir3/ir3_parser.y b/lib/mesa/src/freedreno/ir3/ir3_parser.y index acd94b35a..fd29c639d 100644 --- a/lib/mesa/src/freedreno/ir3/ir3_parser.y +++ b/lib/mesa/src/freedreno/ir3/ir3_parser.y @@ -399,6 +399,7 @@ static void print_token(FILE *file, int type, YYSTYPE value) %token <tok> T_OP_STKR %token <tok> T_OP_XSET %token <tok> T_OP_XCLR +%token <tok> T_OP_GETLAST %token <tok> T_OP_GETONE %token <tok> T_OP_DBG %token <tok> T_OP_SHPS @@ -458,6 +459,7 @@ static void print_token(FILE *file, int type, YYSTYPE value) %token <tok> T_OP_SHR_B %token <tok> T_OP_ASHR_B %token <tok> T_OP_BARY_F +%token <tok> T_OP_FLAT_B %token <tok> T_OP_MGEN_B %token <tok> T_OP_GETBIT_B %token <tok> T_OP_SETRM @@ -482,7 +484,15 @@ static void print_token(FILE *file, int type, YYSTYPE value) %token <tok> T_OP_SEL_F32 %token <tok> T_OP_SAD_S16 %token <tok> T_OP_SAD_S32 -%token <tok> T_OP_SHLG_B16 +%token <tok> T_OP_SHRM +%token <tok> T_OP_SHLM +%token <tok> T_OP_SHRG +%token <tok> T_OP_SHLG +%token <tok> T_OP_ANDG +%token <tok> T_OP_DP2ACC +%token <tok> T_OP_DP4ACC +%token <tok> T_OP_WMM +%token <tok> T_OP_WMM_ACCU /* category 4: */ %token <tok> T_OP_RCP @@ -525,6 +535,11 @@ static void print_token(FILE *file, int type, YYSTYPE value) %token <tok> T_OP_DSYPP_1 %token <tok> T_OP_RGETPOS %token <tok> T_OP_RGETINFO +%token <tok> T_OP_BRCST_A +%token <tok> T_OP_QSHUFFLE_BRCST +%token <tok> T_OP_QSHUFFLE_H +%token <tok> T_OP_QSHUFFLE_V +%token <tok> T_OP_QSHUFFLE_DIAG /* category 6: */ %token <tok> T_OP_LDG @@ -568,6 +583,28 @@ static void print_token(FILE *file, int type, YYSTYPE value) %token <tok> T_OP_ATOMIC_B_AND %token <tok> T_OP_ATOMIC_B_OR %token <tok> T_OP_ATOMIC_B_XOR +%token <tok> T_OP_ATOMIC_S_ADD +%token <tok> T_OP_ATOMIC_S_SUB +%token <tok> T_OP_ATOMIC_S_XCHG +%token <tok> T_OP_ATOMIC_S_INC +%token <tok> T_OP_ATOMIC_S_DEC +%token <tok> T_OP_ATOMIC_S_CMPXCHG +%token <tok> T_OP_ATOMIC_S_MIN +%token <tok> T_OP_ATOMIC_S_MAX +%token <tok> T_OP_ATOMIC_S_AND +%token <tok> T_OP_ATOMIC_S_OR +%token <tok> T_OP_ATOMIC_S_XOR +%token <tok> T_OP_ATOMIC_G_ADD +%token <tok> T_OP_ATOMIC_G_SUB +%token <tok> T_OP_ATOMIC_G_XCHG +%token <tok> T_OP_ATOMIC_G_INC +%token <tok> T_OP_ATOMIC_G_DEC +%token <tok> T_OP_ATOMIC_G_CMPXCHG +%token <tok> T_OP_ATOMIC_G_MIN +%token <tok> T_OP_ATOMIC_G_MAX +%token <tok> T_OP_ATOMIC_G_AND +%token <tok> T_OP_ATOMIC_G_OR +%token <tok> T_OP_ATOMIC_G_XOR %token <tok> T_OP_LDGB %token <tok> T_OP_STGB %token <tok> T_OP_STIB @@ -575,6 +612,8 @@ static void print_token(FILE *file, int type, YYSTYPE value) %token <tok> T_OP_LDLV %token <tok> T_OP_GETSPID %token <tok> T_OP_GETWID +%token <tok> T_OP_GETFIBERID +%token <tok> T_OP_STC /* category 7: */ %token <tok> T_OP_BAR @@ -593,6 +632,11 @@ static void print_token(FILE *file, int type, YYSTYPE value) %token <tok> T_UNTYPED %token <tok> T_TYPED +%token <tok> T_MIXED +%token <tok> T_UNSIGNED +%token <tok> T_LOW +%token <tok> T_HIGH + %token <tok> T_1D %token <tok> T_2D %token <tok> T_3D @@ -746,7 +790,7 @@ iflag: T_SY { iflags.flags |= IR3_INSTR_SY; } iflags: | iflag iflags -instrs: instr instrs +instrs: instrs instr | instr instr: iflags cat0_instr @@ -800,6 +844,7 @@ cat0_instr: T_OP_NOP { new_instr(OPC_NOP); } | T_OP_PREDT { new_instr(OPC_PREDT); } cat0_src1 | T_OP_PREDF { new_instr(OPC_PREDF); } cat0_src1 | T_OP_PREDE { new_instr(OPC_PREDE); } +| T_OP_GETLAST '.' T_W { new_instr(OPC_GETLAST); } cat0_immed cat1_opc: T_OP_MOV '.' T_CAT1_TYPE_TYPE { parse_type_type(new_instr(OPC_MOV), $3); @@ -815,9 +860,16 @@ cat1_movmsk: T_OP_MOVMSK '.' T_W { new_instr(OPC_MOVMSK); instr->cat1.src_type = TYPE_U32; instr->cat1.dst_type = TYPE_U32; - instr->repeat = $3 - 1; } dst_reg { - instr->dsts[0]->wrmask = (1 << $3) - 1; + if (($3 % 32) != 0) + yyerror("w# must be multiple of 32"); + if ($3 < 32) + yyerror("w# must be at least 32"); + + int num = $3 / 32; + + instr->repeat = num - 1; + instr->dsts[0]->wrmask = (1 << num) - 1; } cat1_mova1: T_OP_MOVA1 T_A1 ',' { @@ -894,6 +946,7 @@ cat2_opc_2src: T_OP_ADD_F { new_instr(OPC_ADD_F); } | T_OP_SHR_B { new_instr(OPC_SHR_B); } | T_OP_ASHR_B { new_instr(OPC_ASHR_B); } | T_OP_BARY_F { new_instr(OPC_BARY_F); } +| T_OP_FLAT_B { new_instr(OPC_FLAT_B); } | T_OP_MGEN_B { new_instr(OPC_MGEN_B); } | T_OP_GETBIT_B { new_instr(OPC_GETBIT_B); } | T_OP_SHB { new_instr(OPC_SHB); } @@ -910,6 +963,12 @@ cat2_instr: cat2_opc_1src dst_reg ',' src_reg_or_const_or_rel_or_imm | cat2_opc_2src_cnd '.' cond dst_reg ',' src_reg_or_const_or_rel_or_imm ',' src_reg_or_const_or_rel_or_imm | cat2_opc_2src dst_reg ',' src_reg_or_const_or_rel_or_imm ',' src_reg_or_const_or_rel_or_imm +cat3_dp_signedness:'.' T_MIXED { instr->cat3.signedness = IR3_SRC_MIXED; } +| '.' T_UNSIGNED{ instr->cat3.signedness = IR3_SRC_UNSIGNED; } + +cat3_dp_pack: '.' T_LOW { instr->cat3.packed = IR3_SRC_PACKED_LOW; } +| '.' T_HIGH { instr->cat3.packed = IR3_SRC_PACKED_HIGH; } + cat3_opc: T_OP_MAD_U16 { new_instr(OPC_MAD_U16); } | T_OP_MADSH_U16 { new_instr(OPC_MADSH_U16); } | T_OP_MAD_S16 { new_instr(OPC_MAD_S16); } @@ -927,8 +986,22 @@ cat3_opc: T_OP_MAD_U16 { new_instr(OPC_MAD_U16); } | T_OP_SAD_S16 { new_instr(OPC_SAD_S16); } | T_OP_SAD_S32 { new_instr(OPC_SAD_S32); } +cat3_imm_reg_opc: T_OP_SHRM { new_instr(OPC_SHRM); } +| T_OP_SHLM { new_instr(OPC_SHLM); } +| T_OP_SHRG { new_instr(OPC_SHRG); } +| T_OP_SHLG { new_instr(OPC_SHLG); } +| T_OP_ANDG { new_instr(OPC_ANDG); } + +cat3_wmm: T_OP_WMM { new_instr(OPC_WMM); } +| T_OP_WMM_ACCU { new_instr(OPC_WMM_ACCU); } + +cat3_dp: T_OP_DP2ACC { new_instr(OPC_DP2ACC); } +| T_OP_DP4ACC { new_instr(OPC_DP4ACC); } + cat3_instr: cat3_opc dst_reg ',' src_reg_or_const_or_rel ',' src_reg_or_const ',' src_reg_or_const_or_rel -| T_OP_SHLG_B16 { new_instr(OPC_SHLG_B16); } dst_reg ',' src_reg_or_rel_or_imm ',' src_reg_or_const ',' src_reg_or_rel_or_imm +| cat3_imm_reg_opc dst_reg ',' src_reg_or_rel_or_imm ',' src_reg_or_const ',' src_reg_or_rel_or_imm +| cat3_wmm dst_reg ',' src_reg_gpr ',' src_reg ',' immediate +| cat3_dp cat3_dp_signedness cat3_dp_pack dst_reg ',' src_reg_or_rel_or_imm ',' src_reg_or_const ',' src_reg_or_rel_or_imm cat4_opc: T_OP_RCP { new_instr(OPC_RCP); } | T_OP_RSQ { new_instr(OPC_RSQ); } @@ -972,6 +1045,11 @@ cat5_opc: T_OP_ISAM { new_instr(OPC_ISAM); } | T_OP_SAMGP3 { new_instr(OPC_SAMGP3); } | T_OP_RGETPOS { new_instr(OPC_RGETPOS); } | T_OP_RGETINFO { new_instr(OPC_RGETINFO); } +| T_OP_BRCST_A { new_instr(OPC_BRCST_ACTIVE); } +| T_OP_QSHUFFLE_BRCST { new_instr(OPC_QUAD_SHUFFLE_BRCST); } +| T_OP_QSHUFFLE_H { new_instr(OPC_QUAD_SHUFFLE_HORIZ); } +| T_OP_QSHUFFLE_V { new_instr(OPC_QUAD_SHUFFLE_VERT); } +| T_OP_QSHUFFLE_DIAG { new_instr(OPC_QUAD_SHUFFLE_DIAG); } cat5_flag: '.' T_3D { instr->flags |= IR3_INSTR_3D; } | '.' 'a' { instr->flags |= IR3_INSTR_A; } @@ -979,13 +1057,15 @@ cat5_flag: '.' T_3D { instr->flags |= IR3_INSTR_3D; } | '.' 'p' { instr->flags |= IR3_INSTR_P; } | '.' 's' { instr->flags |= IR3_INSTR_S; } | '.' T_S2EN { instr->flags |= IR3_INSTR_S2EN; } +| '.' T_UNIFORM { } | '.' T_NONUNIFORM { instr->flags |= IR3_INSTR_NONUNIF; } | '.' T_BASE { instr->flags |= IR3_INSTR_B; instr->cat5.tex_base = $2; } +| '.' T_W { instr->cat5.cluster_size = $2; } cat5_flags: | cat5_flag cat5_flags cat5_samp: T_SAMP { instr->cat5.samp = $1; } -cat5_tex: T_TEX { if (instr->flags & IR3_INSTR_B) instr->cat5.samp |= ($1 << 4); else instr->cat5.tex = $1; } +cat5_tex: T_TEX { instr->cat5.tex = $1; } cat5_type: '(' type ')' { instr->cat5.type = $2; } cat5_a1: src_reg { instr->flags |= IR3_INSTR_A1EN; } @@ -1018,7 +1098,7 @@ cat6_imm_offset: offset { new_src(0, IR3_REG_IMMED)->iim_val = $1; } cat6_offset: cat6_imm_offset | '+' src cat6_dst_offset: offset { instr->cat6.dst_offset = $1; } -| '+' src { instr->flags |= IR3_INSTR_G; } +| '+' src cat6_immed: integer { instr->cat6.iim_val = $1; } @@ -1066,14 +1146,39 @@ cat6_atomic_opc: T_OP_ATOMIC_ADD { new_instr(OPC_ATOMIC_ADD); } | T_OP_ATOMIC_OR { new_instr(OPC_ATOMIC_OR); } | T_OP_ATOMIC_XOR { new_instr(OPC_ATOMIC_XOR); } -cat6_atomic_g: cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src { - instr->flags |= IR3_INSTR_G; - } +cat6_a3xx_atomic_opc: T_OP_ATOMIC_S_ADD { new_instr(OPC_ATOMIC_S_ADD); } +| T_OP_ATOMIC_S_SUB { new_instr(OPC_ATOMIC_S_SUB); } +| T_OP_ATOMIC_S_XCHG { new_instr(OPC_ATOMIC_S_XCHG); } +| T_OP_ATOMIC_S_INC { new_instr(OPC_ATOMIC_S_INC); } +| T_OP_ATOMIC_S_DEC { new_instr(OPC_ATOMIC_S_DEC); } +| T_OP_ATOMIC_S_CMPXCHG { new_instr(OPC_ATOMIC_S_CMPXCHG); } +| T_OP_ATOMIC_S_MIN { new_instr(OPC_ATOMIC_S_MIN); } +| T_OP_ATOMIC_S_MAX { new_instr(OPC_ATOMIC_S_MAX); } +| T_OP_ATOMIC_S_AND { new_instr(OPC_ATOMIC_S_AND); } +| T_OP_ATOMIC_S_OR { new_instr(OPC_ATOMIC_S_OR); } +| T_OP_ATOMIC_S_XOR { new_instr(OPC_ATOMIC_S_XOR); } + +cat6_a6xx_atomic_opc: T_OP_ATOMIC_G_ADD { new_instr(OPC_ATOMIC_G_ADD); } +| T_OP_ATOMIC_G_SUB { new_instr(OPC_ATOMIC_G_SUB); } +| T_OP_ATOMIC_G_XCHG { new_instr(OPC_ATOMIC_G_XCHG); } +| T_OP_ATOMIC_G_INC { new_instr(OPC_ATOMIC_G_INC); } +| T_OP_ATOMIC_G_DEC { new_instr(OPC_ATOMIC_G_DEC); } +| T_OP_ATOMIC_G_CMPXCHG { new_instr(OPC_ATOMIC_G_CMPXCHG); } +| T_OP_ATOMIC_G_MIN { new_instr(OPC_ATOMIC_G_MIN); } +| T_OP_ATOMIC_G_MAX { new_instr(OPC_ATOMIC_G_MAX); } +| T_OP_ATOMIC_G_AND { new_instr(OPC_ATOMIC_G_AND); } +| T_OP_ATOMIC_G_OR { new_instr(OPC_ATOMIC_G_OR); } +| T_OP_ATOMIC_G_XOR { new_instr(OPC_ATOMIC_G_XOR); } + +cat6_a3xx_atomic_s: cat6_a3xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src + +cat6_a6xx_atomic_g: cat6_a6xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' src ',' src cat6_atomic_l: cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'l' dst_reg ',' 'l' '[' cat6_reg_or_immed ']' ',' src -cat6_atomic: cat6_atomic_g -| cat6_atomic_l +cat6_atomic: cat6_atomic_l +| cat6_a3xx_atomic_s +| cat6_a6xx_atomic_g cat6_ibo_opc_1src: T_OP_RESINFO { new_instr(OPC_RESINFO); } @@ -1087,6 +1192,7 @@ cat6_ibo: cat6_ibo_opc_1src cat6_type cat6_dim dst_reg ',' 'g' '[' cat6 cat6_id_opc: T_OP_GETSPID { new_instr(OPC_GETSPID); } | T_OP_GETWID { new_instr(OPC_GETWID); } +| T_OP_GETFIBERID { new_instr(OPC_GETFIBERID); } cat6_id: cat6_id_opc cat6_type dst_reg @@ -1102,17 +1208,17 @@ cat6_reg_or_immed: src cat6_bindless_ibo_opc_1src: T_OP_RESINFO_B { new_instr(OPC_RESINFO); } -cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD { new_instr(OPC_ATOMIC_ADD)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_SUB { new_instr(OPC_ATOMIC_SUB)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_XCHG { new_instr(OPC_ATOMIC_XCHG)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_INC { new_instr(OPC_ATOMIC_INC)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_DEC { new_instr(OPC_ATOMIC_DEC)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_CMPXCHG { new_instr(OPC_ATOMIC_CMPXCHG)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_MIN { new_instr(OPC_ATOMIC_MIN)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_MAX { new_instr(OPC_ATOMIC_MAX)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_AND { new_instr(OPC_ATOMIC_AND)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_OR { new_instr(OPC_ATOMIC_OR)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_XOR { new_instr(OPC_ATOMIC_XOR)->flags |= IR3_INSTR_G; dummy_dst(); } +cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD { new_instr(OPC_ATOMIC_B_ADD); dummy_dst(); } +| T_OP_ATOMIC_B_SUB { new_instr(OPC_ATOMIC_B_SUB); dummy_dst(); } +| T_OP_ATOMIC_B_XCHG { new_instr(OPC_ATOMIC_B_XCHG); dummy_dst(); } +| T_OP_ATOMIC_B_INC { new_instr(OPC_ATOMIC_B_INC); dummy_dst(); } +| T_OP_ATOMIC_B_DEC { new_instr(OPC_ATOMIC_B_DEC); dummy_dst(); } +| T_OP_ATOMIC_B_CMPXCHG { new_instr(OPC_ATOMIC_B_CMPXCHG); dummy_dst(); } +| T_OP_ATOMIC_B_MIN { new_instr(OPC_ATOMIC_B_MIN); dummy_dst(); } +| T_OP_ATOMIC_B_MAX { new_instr(OPC_ATOMIC_B_MAX); dummy_dst(); } +| T_OP_ATOMIC_B_AND { new_instr(OPC_ATOMIC_B_AND); dummy_dst(); } +| T_OP_ATOMIC_B_OR { new_instr(OPC_ATOMIC_B_OR); dummy_dst(); } +| T_OP_ATOMIC_B_XOR { new_instr(OPC_ATOMIC_B_XOR); dummy_dst(); } | T_OP_STIB_B { new_instr(OPC_STIB); dummy_dst(); } cat6_bindless_ibo_opc_2src_dst: T_OP_LDIB_B { new_instr(OPC_LDIB); } @@ -1123,13 +1229,23 @@ cat6_bindless_ibo: cat6_bindless_ibo_opc_1src cat6_typed cat6_dim cat6_type '.' cat6_bindless_ldc_opc: T_OP_LDC { new_instr(OPC_LDC); } -cat6_bindless_ldc: cat6_bindless_ldc_opc '.' T_OFFSET '.' cat6_immed '.' cat6_bindless_mode dst_reg ',' cat6_reg_or_immed ',' cat6_reg_or_immed { - instr->cat6.d = $3; +/* This is separated from the opcode to avoid lookahead/shift-reduce conflicts */ +cat6_bindless_ldc_middle: + T_OFFSET '.' cat6_immed '.' cat6_bindless_mode dst_reg { instr->cat6.d = $1; } +| cat6_immed '.' 'k' '.' cat6_bindless_mode 'c' '[' T_A1 ']' { instr->opc = OPC_LDC_K; } + +cat6_bindless_ldc: cat6_bindless_ldc_opc '.' cat6_bindless_ldc_middle ',' cat6_reg_or_immed ',' cat6_reg_or_immed { instr->cat6.type = TYPE_U32; /* TODO cleanup ir3 src order: */ swap(instr->srcs[0], instr->srcs[1]); } +stc_dst: integer { new_src(0, IR3_REG_IMMED)->iim_val = $1; } +| T_A1 { new_src(0, IR3_REG_IMMED)->iim_val = 0; instr->flags |= IR3_INSTR_A1EN; } +| T_A1 '+' integer { new_src(0, IR3_REG_IMMED)->iim_val = $3; instr->flags |= IR3_INSTR_A1EN; } + +cat6_stc: T_OP_STC { new_instr(OPC_STC); } cat6_type 'c' '[' stc_dst ']' ',' src_reg ',' cat6_immed + cat6_todo: T_OP_G2L { new_instr(OPC_G2L); } | T_OP_L2G { new_instr(OPC_L2G); } | T_OP_RESFMT { new_instr(OPC_RESFMT); } @@ -1144,6 +1260,7 @@ cat6_instr: cat6_load | cat6_id | cat6_bindless_ldc | cat6_bindless_ibo +| cat6_stc | cat6_todo cat7_scope: '.' 'w' { instr->cat7.w = true; } @@ -1195,6 +1312,9 @@ src_reg_flags: src_reg_flag src_reg: src | src_reg_flags src +src_reg_gpr: src_reg +| relative_gpr_src + src_const: const | src_reg_flags const diff --git a/lib/mesa/src/freedreno/ir3/ir3_postsched.c b/lib/mesa/src/freedreno/ir3/ir3_postsched.c index 507302a00..39de84add 100644 --- a/lib/mesa/src/freedreno/ir3/ir3_postsched.c +++ b/lib/mesa/src/freedreno/ir3/ir3_postsched.c @@ -68,8 +68,10 @@ struct ir3_postsched_ctx { struct list_head unscheduled_list; /* unscheduled instructions */ - int sfu_delay; - int tex_delay; + unsigned ip; + + int ss_delay; + int sy_delay; }; struct ir3_postsched_node { @@ -77,7 +79,9 @@ struct ir3_postsched_node { struct ir3_instruction *instr; bool partially_evaluated_path; - bool has_tex_src, has_sfu_src; + unsigned earliest_ip; + + bool has_sy_src, has_ss_src; unsigned delay; unsigned max_delay; @@ -87,17 +91,17 @@ struct ir3_postsched_node { list_for_each_entry (struct ir3_postsched_node, __n, __list, dag.link) static bool -has_tex_src(struct ir3_instruction *instr) +has_sy_src(struct ir3_instruction *instr) { struct ir3_postsched_node *node = instr->data; - return node->has_tex_src; + return node->has_sy_src; } static bool -has_sfu_src(struct ir3_instruction *instr) +has_ss_src(struct ir3_instruction *instr) { struct ir3_postsched_node *node = instr->data; - return node->has_sfu_src; + return node->has_ss_src; } static void @@ -111,28 +115,45 @@ schedule(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr) di(instr, "schedule"); - list_addtail(&instr->node, &instr->block->instr_list); + bool counts_for_delay = is_alu(instr) || is_flow(instr); + + unsigned delay_cycles = counts_for_delay ? 1 + instr->repeat : 0; struct ir3_postsched_node *n = instr->data; + + /* We insert any nop's needed to get to earliest_ip, then advance + * delay_cycles by scheduling the instruction. + */ + ctx->ip = MAX2(ctx->ip, n->earliest_ip) + delay_cycles; + + util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) { + unsigned delay = (unsigned)(uintptr_t)edge->data; + struct ir3_postsched_node *child = + container_of(edge->child, struct ir3_postsched_node, dag); + child->earliest_ip = MAX2(child->earliest_ip, ctx->ip + delay); + } + + list_addtail(&instr->node, &instr->block->instr_list); + dag_prune_head(ctx->dag, &n->dag); if (is_meta(instr) && (instr->opc != OPC_META_TEX_PREFETCH)) return; - if (is_sfu(instr)) { - ctx->sfu_delay = 8; - } else if (has_sfu_src(instr)) { - ctx->sfu_delay = 0; - } else if (ctx->sfu_delay > 0) { - ctx->sfu_delay--; + if (is_ss_producer(instr)) { + ctx->ss_delay = soft_ss_delay(instr); + } else if (has_ss_src(instr)) { + ctx->ss_delay = 0; + } else if (ctx->ss_delay > 0) { + ctx->ss_delay--; } - if (is_tex_or_prefetch(instr)) { - ctx->tex_delay = 10; - } else if (has_tex_src(instr)) { - ctx->tex_delay = 0; - } else if (ctx->tex_delay > 0) { - ctx->tex_delay--; + if (is_sy_producer(instr)) { + ctx->sy_delay = soft_sy_delay(instr, ctx->block->shader); + } else if (has_sy_src(instr)) { + ctx->sy_delay = 0; + } else if (ctx->sy_delay > 0) { + ctx->sy_delay--; } } @@ -154,25 +175,26 @@ dump_state(struct ir3_postsched_ctx *ctx) } } -/* Determine if this is an instruction that we'd prefer not to schedule - * yet, in order to avoid an (ss) sync. This is limited by the sfu_delay - * counter, ie. the more cycles it has been since the last SFU, the less - * costly a sync would be. - */ -static bool -would_sync(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr) +static unsigned +node_delay(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n) { - if (ctx->sfu_delay) { - if (has_sfu_src(instr)) - return true; - } + return MAX2(n->earliest_ip, ctx->ip) - ctx->ip; +} - if (ctx->tex_delay) { - if (has_tex_src(instr)) - return true; - } +static unsigned +node_delay_soft(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n) +{ + unsigned delay = node_delay(ctx, n); + + /* This takes into account that as when we schedule multiple tex or sfu, the + * first user has to wait for all of them to complete. + */ + if (n->has_ss_src) + delay = MAX2(delay, ctx->ss_delay); + if (n->has_sy_src) + delay = MAX2(delay, ctx->sy_delay); - return false; + return delay; } /* find instruction to schedule: */ @@ -215,8 +237,7 @@ choose_instr(struct ir3_postsched_ctx *ctx) /* Next prioritize discards: */ foreach_sched_node (n, &ctx->dag->heads) { - unsigned d = - ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs); + unsigned d = node_delay(ctx, n); if (d > 0) continue; @@ -235,13 +256,12 @@ choose_instr(struct ir3_postsched_ctx *ctx) /* Next prioritize expensive instructions: */ foreach_sched_node (n, &ctx->dag->heads) { - unsigned d = - ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs); + unsigned d = node_delay_soft(ctx, n); if (d > 0) continue; - if (!(is_sfu(n->instr) || is_tex(n->instr))) + if (!(is_ss_producer(n->instr) || is_sy_producer(n->instr))) continue; if (!chosen || (chosen->max_delay < n->max_delay)) @@ -249,53 +269,36 @@ choose_instr(struct ir3_postsched_ctx *ctx) } if (chosen) { - di(chosen->instr, "csp: chose (sfu/tex, hard ready)"); + di(chosen->instr, "csp: chose (sfu/tex, soft ready)"); return chosen->instr; } - /* - * Sometimes be better to take a nop, rather than scheduling an - * instruction that would require an (ss) shortly after another - * SFU.. ie. if last SFU was just one or two instr ago, and we - * could choose between taking a nop and then scheduling - * something else, vs scheduling the immed avail instruction that - * would require (ss), we are better with the nop. - */ - for (unsigned delay = 0; delay < 4; delay++) { - foreach_sched_node (n, &ctx->dag->heads) { - if (would_sync(ctx, n->instr)) - continue; - - unsigned d = ir3_delay_calc_postra(ctx->block, n->instr, true, - ctx->v->mergedregs); - - if (d > delay) - continue; - - if (!chosen || (chosen->max_delay < n->max_delay)) - chosen = n; - } - - if (chosen) { - di(chosen->instr, "csp: chose (soft ready, delay=%u)", delay); - return chosen->instr; - } - } - /* Next try to find a ready leader w/ soft delay (ie. including extra * delay for things like tex fetch which can be synchronized w/ sync * bit (but we probably do want to schedule some other instructions - * while we wait) + * while we wait). We also allow a small amount of nops, to prefer now-nops + * over future-nops up to a point, as that gives better results. */ + unsigned chosen_delay = 0; foreach_sched_node (n, &ctx->dag->heads) { - unsigned d = - ir3_delay_calc_postra(ctx->block, n->instr, true, ctx->v->mergedregs); + unsigned d = node_delay_soft(ctx, n); - if (d > 0) + if (d > 3) continue; - if (!chosen || (chosen->max_delay < n->max_delay)) + if (!chosen || d < chosen_delay) { + chosen = n; + chosen_delay = d; + continue; + } + + if (d > chosen_delay) + continue; + + if (chosen->max_delay < n->max_delay) { chosen = n; + chosen_delay = d; + } } if (chosen) { @@ -308,8 +311,7 @@ choose_instr(struct ir3_postsched_ctx *ctx) * stalls.. but we've already decided there is not a better option. */ foreach_sched_node (n, &ctx->dag->heads) { - unsigned d = - ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs); + unsigned d = node_delay(ctx, n); if (d > 0) continue; @@ -324,9 +326,6 @@ choose_instr(struct ir3_postsched_ctx *ctx) } /* Otherwise choose leader with maximum cost: - * - * TODO should we try to balance cost and delays? I guess it is - * a balance between now-nop's and future-nop's? */ foreach_sched_node (n, &ctx->dag->heads) { if (!chosen || chosen->max_delay < n->max_delay) @@ -361,6 +360,7 @@ struct ir3_postsched_deps_state { * for full precision and 2nd half for half-precision. */ struct ir3_postsched_node *regs[2 * 256]; + unsigned dst_n[2 * 256]; }; /* bounds checking read/write accessors, since OoB access to stuff on @@ -374,7 +374,8 @@ struct ir3_postsched_deps_state { static void add_dep(struct ir3_postsched_deps_state *state, - struct ir3_postsched_node *before, struct ir3_postsched_node *after) + struct ir3_postsched_node *before, struct ir3_postsched_node *after, + unsigned d) { if (!before || !after) return; @@ -382,30 +383,36 @@ add_dep(struct ir3_postsched_deps_state *state, assert(before != after); if (state->direction == F) { - dag_add_edge(&before->dag, &after->dag, NULL); + dag_add_edge_max_data(&before->dag, &after->dag, (uintptr_t)d); } else { - dag_add_edge(&after->dag, &before->dag, NULL); + dag_add_edge_max_data(&after->dag, &before->dag, 0); } } static void add_single_reg_dep(struct ir3_postsched_deps_state *state, - struct ir3_postsched_node *node, unsigned num, int src_n) + struct ir3_postsched_node *node, unsigned num, int src_n, + int dst_n) { struct ir3_postsched_node *dep = dep_reg(state, num); + unsigned d = 0; if (src_n >= 0 && dep && state->direction == F) { - unsigned d = ir3_delayslots(dep->instr, node->instr, src_n, true); - node->delay = MAX2(node->delay, d); - if (is_tex_or_prefetch(dep->instr)) - node->has_tex_src = true; - if (is_tex_or_prefetch(dep->instr)) - node->has_sfu_src = true; - } - - add_dep(state, dep, node); + /* get the dst_n this corresponds to */ + unsigned dst_n = state->dst_n[num]; + unsigned d_soft = ir3_delayslots(dep->instr, node->instr, src_n, true); + d = ir3_delayslots_with_repeat(dep->instr, node->instr, dst_n, src_n); + node->delay = MAX2(node->delay, d_soft); + if (is_sy_producer(dep->instr)) + node->has_sy_src = true; + if (is_ss_producer(dep->instr)) + node->has_ss_src = true; + } + + add_dep(state, dep, node, d); if (src_n < 0) { dep_reg(state, num) = node; + state->dst_n[num] = dst_n; } } @@ -413,15 +420,15 @@ add_single_reg_dep(struct ir3_postsched_deps_state *state, * between half and full precision that result in additional dependencies. * The 'reg' arg is really just to know half vs full precision. * - * If non-negative, then this adds a dependency on a source register, and + * If src_n is positive, then this adds a dependency on a source register, and * src_n is the index passed into ir3_delayslots() for calculating the delay: - * If positive, corresponds to node->instr->regs[src_n]. If negative, then - * this is for a destination register. + * it corresponds to node->instr->srcs[src_n]. If src_n is negative, then + * this is for the destination register corresponding to dst_n. */ static void add_reg_dep(struct ir3_postsched_deps_state *state, struct ir3_postsched_node *node, const struct ir3_register *reg, - unsigned num, int src_n) + unsigned num, int src_n, int dst_n) { if (state->merged) { /* Make sure that special registers like a0.x that are written as @@ -430,16 +437,16 @@ add_reg_dep(struct ir3_postsched_deps_state *state, */ if ((reg->flags & IR3_REG_HALF) && !is_reg_special(reg)) { /* single conflict in half-reg space: */ - add_single_reg_dep(state, node, num, src_n); + add_single_reg_dep(state, node, num, src_n, dst_n); } else { /* two conflicts in half-reg space: */ - add_single_reg_dep(state, node, 2 * num + 0, src_n); - add_single_reg_dep(state, node, 2 * num + 1, src_n); + add_single_reg_dep(state, node, 2 * num + 0, src_n, dst_n); + add_single_reg_dep(state, node, 2 * num + 1, src_n, dst_n); } } else { if (reg->flags & IR3_REG_HALF) num += ARRAY_SIZE(state->regs) / 2; - add_single_reg_dep(state, node, num, src_n); + add_single_reg_dep(state, node, num, src_n, dst_n); } } @@ -457,12 +464,12 @@ calculate_deps(struct ir3_postsched_deps_state *state, if (reg->flags & IR3_REG_RELATIV) { /* mark entire array as read: */ for (unsigned j = 0; j < reg->size; j++) { - add_reg_dep(state, node, reg, reg->array.base + j, i); + add_reg_dep(state, node, reg, reg->array.base + j, i, -1); } } else { assert(reg->wrmask >= 1); u_foreach_bit (b, reg->wrmask) { - add_reg_dep(state, node, reg, reg->num + b, i); + add_reg_dep(state, node, reg, reg->num + b, i, -1); } } } @@ -470,18 +477,18 @@ calculate_deps(struct ir3_postsched_deps_state *state, /* And then after we update the state for what this instruction * wrote: */ - foreach_dst (reg, node->instr) { + foreach_dst_n (reg, i, node->instr) { if (reg->wrmask == 0) continue; if (reg->flags & IR3_REG_RELATIV) { /* mark the entire array as written: */ - for (unsigned i = 0; i < reg->size; i++) { - add_reg_dep(state, node, reg, reg->array.base + i, -1); + for (unsigned j = 0; j < reg->size; j++) { + add_reg_dep(state, node, reg, reg->array.base + j, -1, i); } } else { assert(reg->wrmask >= 1); u_foreach_bit (b, reg->wrmask) { - add_reg_dep(state, node, reg, reg->num + b, -1); + add_reg_dep(state, node, reg, reg->num + b, -1, i); } } } @@ -593,7 +600,7 @@ sched_dag_init(struct ir3_postsched_ctx *ctx) if (src->block != instr->block) continue; - dag_add_edge(&sn->dag, &n->dag, NULL); + dag_add_edge_max_data(&sn->dag, &n->dag, 0); } if (is_input(instr)) { @@ -602,14 +609,14 @@ sched_dag_init(struct ir3_postsched_ctx *ctx) util_dynarray_foreach (&inputs, struct ir3_instruction *, instrp) { struct ir3_instruction *input = *instrp; struct ir3_postsched_node *in = input->data; - dag_add_edge(&in->dag, &n->dag, NULL); + dag_add_edge_max_data(&in->dag, &n->dag, 0); } util_dynarray_append(&kills, struct ir3_instruction *, instr); } else if (is_tex(instr) || is_mem(instr)) { util_dynarray_foreach (&kills, struct ir3_instruction *, instrp) { struct ir3_instruction *kill = *instrp; struct ir3_postsched_node *kn = kill->data; - dag_add_edge(&kn->dag, &n->dag, NULL); + dag_add_edge_max_data(&kn->dag, &n->dag, 0); } } } @@ -630,8 +637,8 @@ static void sched_block(struct ir3_postsched_ctx *ctx, struct ir3_block *block) { ctx->block = block; - ctx->tex_delay = 0; - ctx->sfu_delay = 0; + ctx->sy_delay = 0; + ctx->ss_delay = 0; /* move all instructions to the unscheduled list, and * empty the block's instruction list (to which we will @@ -677,18 +684,10 @@ sched_block(struct ir3_postsched_ctx *ctx, struct ir3_block *block) while (!list_is_empty(&ctx->unscheduled_list)) { struct ir3_instruction *instr = choose_instr(ctx); - unsigned delay = - ir3_delay_calc_postra(ctx->block, instr, false, ctx->v->mergedregs); + unsigned delay = node_delay(ctx, instr->data); d("delay=%u", delay); - /* and if we run out of instructions that can be scheduled, - * then it is time for nop's: - */ debug_assert(delay <= 6); - while (delay > 0) { - ir3_NOP(block); - delay--; - } schedule(ctx, instr); } @@ -750,7 +749,6 @@ ir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v) .v = v, }; - ir3_remove_nops(ir); cleanup_self_movs(ir); foreach_block (block, &ir->block_list) { diff --git a/lib/mesa/src/freedreno/ir3/ir3_ra.h b/lib/mesa/src/freedreno/ir3/ir3_ra.h index 259341eaa..c6837aaae 100644 --- a/lib/mesa/src/freedreno/ir3/ir3_ra.h +++ b/lib/mesa/src/freedreno/ir3/ir3_ra.h @@ -124,7 +124,7 @@ ra_reg_is_dst(const struct ir3_register *reg) if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i]))) #define ra_foreach_dst_n(__dstreg, __n, __instr) \ - foreach_dst_n(__dstreg, __n, instr) \ + foreach_dst_n(__dstreg, __n, __instr) \ if (ra_reg_is_dst(__dstreg)) #define ra_foreach_dst(__dstreg, __instr) \ diff --git a/lib/mesa/src/freedreno/ir3/tests/disasm.c b/lib/mesa/src/freedreno/ir3/tests/disasm.c index 542469aa1..2f1b89f0d 100644 --- a/lib/mesa/src/freedreno/ir3/tests/disasm.c +++ b/lib/mesa/src/freedreno/ir3/tests/disasm.c @@ -43,6 +43,8 @@ #include "isa/isa.h" /* clang-format off */ +/* Note: @anholt's 4xx disasm was done on an a418 Nexus 5x */ +#define INSTR_4XX(i, d, ...) { .gpu_id = 420, .instr = #i, .expected = d, __VA_ARGS__ } #define INSTR_5XX(i, d, ...) { .gpu_id = 540, .instr = #i, .expected = d, __VA_ARGS__ } #define INSTR_6XX(i, d, ...) { .gpu_id = 630, .instr = #i, .expected = d, __VA_ARGS__ } /* clang-format on */ @@ -58,153 +60,185 @@ static const struct test { bool parse_fail; } tests[] = { /* clang-format off */ - /* cat0 */ - INSTR_6XX(00000000_00000000, "nop"), - INSTR_6XX(00000200_00000000, "(rpt2)nop"), - INSTR_6XX(03000000_00000000, "end"), - INSTR_6XX(00800000_00000004, "br p0.x, #4"), - INSTR_6XX(00900000_00000003, "br !p0.x, #3"), - INSTR_6XX(03820000_00000015, "shps #21"), /* emit */ - INSTR_6XX(04021000_00000000, "(ss)shpe"), /* cut */ - INSTR_6XX(02820000_00000014, "getone #20"), /* kill p0.x */ - INSTR_6XX(00906020_00000007, "brao !p0.x, !p0.y, #7"), - INSTR_6XX(00804040_00000003, "braa p0.x, p0.y, #3"), - INSTR_6XX(07820000_00000000, "prede"), - INSTR_6XX(00800063_0000001e, "brac.3 #30"), - INSTR_6XX(06820000_00000000, "predt p0.x"), - INSTR_6XX(07020000_00000000, "predf p0.x"), - INSTR_6XX(07820000_00000000, "prede"), - - /* cat1 */ - INSTR_6XX(20244000_00000020, "mov.f32f32 r0.x, c8.x"), - INSTR_6XX(20200000_00000020, "mov.f16f16 hr0.x, hc8.x"), - INSTR_6XX(20150000_00000000, "cov.s32s16 hr0.x, r0.x"), - INSTR_6XX(20156004_00000c11, "(ul)mov.s32s32 r1.x, c<a0.x + 17>"), - INSTR_6XX(201100f4_00000000, "mova a0.x, hr0.x"), - INSTR_6XX(20244905_00000410, "(rpt1)mov.f32f32 r1.y, (r)c260.x"), - INSTR_6XX(20174004_00000008, "mov.s32s32 r<a0.x + 4>, r2.x"), - INSTR_6XX(20130000_00000005, "mov.s16s16 hr<a0.x>, hr1.y"), - INSTR_6XX(20110004_00000800, "mov.s16s16 hr1.x, hr<a0.x>"), - /* dEQP-VK.subgroups.ballot.compute.compute */ - INSTR_6XX(260cc3c0_00000000, "movmsk.w128 r48.x"), /* movmsk.w128 sr48.x */ - - INSTR_6XX(240cc004_00030201, "swz.u32u32 r1.x, r0.w, r0.y, r0.z"), - INSTR_6XX(2400c105_04030201, "gat.f16u32 r1.y, hr0.y, hr0.z, hr0.w, hr1.x"), - INSTR_6XX(240c0205_04030201, "sct.u32f16 hr1.y, hr0.z, hr0.w, hr1.x, r0.y"), - INSTR_6XX(2400c205_04030201, "sct.f16u32 r1.y, r0.z, r0.w, r1.x, hr0.y"), - - INSTR_6XX(20510005_0000ffff, "mov.s16s16 hr1.y, -1"), - INSTR_6XX(20400005_00003900, "mov.f16f16 hr1.y, h(0.625000)"), - INSTR_6XX(20400006_00003800, "mov.f16f16 hr1.z, h(0.500000)"), - INSTR_6XX(204880f5_00000000, "mova1 a1.x, 0"), - - /* cat2 */ - INSTR_6XX(40104002_0c210001, "add.f hr0.z, r0.y, c<a0.x + 33>"), - INSTR_6XX(40b80804_10408004, "(nop3) cmps.f.lt r1.x, (abs)r1.x, c16.x"), - INSTR_6XX(47308a02_00002000, "(rpt2)bary.f (ei)r0.z, (r)0, r0.x"), - INSTR_6XX(43480801_00008001, "(nop3) absneg.s hr0.y, (abs)hr0.y"), - INSTR_6XX(50600004_2c010004, "(sy)mul.f hr1.x, hr1.x, h(0.5)"), - INSTR_6XX(42280807_27ff0000, "(nop3) add.s hr1.w, hr0.x, h(-1)"), - INSTR_6XX(40a500f8_2c000004, "cmps.f.ne p0.x, hr1.x, h(0.0)"), - INSTR_6XX(438000f8_20010009, "and.b p0.x, hr2.y, h(1)"), - INSTR_6XX(438000f9_00020001, "and.b p0.y, hr0.y, hr0.z"), - INSTR_6XX(40080902_50200006, "(rpt1)add.f hr0.z, (r)hr1.z, (neg)(r)hc8.x"), - INSTR_6XX(42380c01_00040001, "(sat)(nop3) add.s r0.y, r0.y, r1.x"), - INSTR_6XX(42480000_48801086, "(nop2) sub.u hr0.x, hc33.z, (neg)hr<a0.x + 128>"), - INSTR_6XX(46b00001_00001020, "clz.b r0.y, c8.x"), - INSTR_6XX(46700009_00000009, "bfrev.b r2.y, r2.y"), - - /* cat3 */ - INSTR_6XX(66000000_10421041, "sel.f16 hr0.x, hc16.y, hr0.x, hc16.z"), - INSTR_6XX(64848109_109a9099, "(rpt1)sel.b32 r2.y, c38.y, (r)r2.y, c38.z"), - INSTR_6XX(64810904_30521036, "(rpt1)sel.b32 r1.x, (r)c13.z, r0.z, (r)c20.z"), - INSTR_6XX(64818902_20041032, "(rpt1)sel.b32 r0.z, (r)c12.z, r0.w, (r)r1.x"), - INSTR_6XX(63820005_10315030, "mad.f32 r1.y, (neg)c12.x, r1.x, c12.y"), - INSTR_6XX(62050009_00091000, "mad.u24 r2.y, c0.x, r2.z, r2.y"), - INSTR_6XX(61828008_00081033, "madsh.m16 r2.x, c12.w, r1.y, r2.x"), - INSTR_6XX(65900820_100cb008, "(nop3) shlg.b16 hr8.x, 8, hr8.x, 12"), /* (nop3) shlg.b16 hr8.x, (r)8, (r)hr8.x, 12; */ - INSTR_6XX(65ae085c_0002a001, "(nop3) shlg.b16 hr23.x, hr0.y, hr23.x, hr0.z"), /* not seen in blob */ - INSTR_6XX(65900820_0c0aac05, "(nop3) shlg.b16 hr8.x, hc<a0.x + 5>, hr8.x, hc<a0.x + 10>"), /* not seen in blob */ - - /* cat4 */ - INSTR_6XX(8010000a_00000003, "rcp r2.z, r0.w"), - - /* cat5 */ - /* dEQP-VK.glsl.derivate.dfdx.uniform_if.float_mediump */ - INSTR_6XX(a3801102_00000001, "dsx (f32)(x)r0.z, r0.x"), /* dsx (f32)(xOOO)r0.z, r0.x */ - /* dEQP-VK.glsl.derivate.dfdy.uniform_if.float_mediump */ - INSTR_6XX(a3c01102_00000001, "dsy (f32)(x)r0.z, r0.x"), /* dsy (f32)(xOOO)r0.z, r0.x */ - /* dEQP-VK.glsl.derivate.dfdxfine.uniform_loop.float_highp */ - INSTR_6XX(a6001105_00000001, "dsxpp.1 (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */ - INSTR_6XX(a6201105_00000001, "dsxpp.1.p (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */ - - INSTR_6XX(a2802f00_00000001, "getsize (u16)(xyzw)hr0.x, r0.x, t#0"), - INSTR_6XX(a0c89f04_c4600005, "sam.base1 (f32)(xyzw)r1.x, r0.z, s#3, t#2"), /* sam.s2en.mode6.base1 (f32)(xyzw)r1.x, r0.z, 35 */ - INSTR_6XX(a1c85f00_c0200005, "getlod.base0 (s32)(xyzw)r0.x, r0.z, s#1, t#0"), /* getlod.s2en.mode6.base0 (s32)(xyzw)r0.x, r0.z, 1 */ - INSTR_6XX(a1000f00_00000004, "samb (f16)(xyzw)hr0.x, hr0.z, hr0.x, s#0, t#0"), - INSTR_6XX(a1000f00_00000003, "samb (f16)(xyzw)hr0.x, r0.y, r0.x, s#0, t#0"), - INSTR_6XX(a0c00f00_04400002, "sam (f16)(xyzw)hr0.x, hr0.y, s#2, t#2"), - INSTR_6XX(a6c02f00_00000000, "rgetinfo (u16)(xyzw)hr0.x"), - INSTR_6XX(a3482f08_c0000000, "getinfo.base0 (u16)(xyzw)hr2.x, t#0"), - /* dEQP-GLES31.functional.texture.texture_buffer.render.as_fragment_texture.buffer_size_65536 */ - INSTR_5XX(a2c03102_00000000, "getbuf (u32)(x)r0.z, t#0"), - INSTR_6XX(a0c81f00_e0200005, "sam.base0 (f32)(xyzw)r0.x, r0.z, s#1, a1.x"), - - - /* cat6 */ - - INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"), /* ldgb.a.untyped.1dtype.u32.1 r0.x, g[r1.x], r0.x, 0 */ - INSTR_5XX(d7660204_02000a01, "(sy)stib.typed.2d.u32.1 g[1], r0.x, r0.z, r1.x"), /* (sy)stib.a.u32.2d.1 g[r1.x], r0.x, r0.z, 1. r1.x is offset in ibo, r0.x is value*/ - /* dEQP-VK.image.load_store.1d_array.r8g8b8a8_unorm */ - INSTR_5XX(c1a20006_0600ba01, "ldib.typed.2d.f32.4 r1.z, g[0], r0.z, r1.z"), /* ldib.a.f32.2d.4 r1.z, g[r0.z], r1.z, 0. r0.z is offset in ibo as src. r1.z */ - /* dEQP-VK.image.load_store.3d.r32g32b32a32_sint */ - INSTR_5XX(c1aa0003_0500fc01, "ldib.typed.3d.s32.4 r0.w, g[0], r0.w, r1.y"), /* ldib.a.s32.3d.4 r0.w, g[r0.w], r1.y, 0. r0.w is offset in ibo as src, and dst */ - /* dEQP-VK.binding_model.shader_access.primary_cmd_buf.storage_image.vertex.descriptor_array.3d */ - INSTR_5XX(c1a20204_0401fc01, "ldib.typed.3d.f32.4 r1.x, g[1], r1.w, r1.x"), /* ldib.a.f32.3d.4 r1.x, g[r1.w], r1.x, 1 */ - /* dEQP-VK.binding_model.shader_access.secondary_cmd_buf.with_push.storage_texel_buffer.vertex_fragment.single_descriptor.offset_zero */ - INSTR_5XX(c1a20005_0501be01, "ldib.typed.4d.f32.4 r1.y, g[0], r1.z, r1.y"), /* ldib.a.f32.1dtype.4 r1.y, g[r1.z], r1.y, 0 */ - /* dEQP-VK.texture.filtering.cube.formats.r8g8b8a8_snorm_nearest */ - INSTR_5XX(c1a60200_0000ba01, "ldib.typed.2d.u32.4 r0.x, g[1], r0.z, r0.x"), /* ldib.a.u32.2d.4 r0.x, g[r0.z], r0.x, 1 */ - - // TODO is this a real instruction? Or float -6.0 ? - // INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true), - /* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */ - INSTR_6XX(c0d20906_02800004, "stg.a.f32 g[r1.x+(r1.z)<<2], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */ - INSTR_6XX(c0da052e_01800042, "stg.a.s32 g[r0.z+(r11.z)<<2], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */ - INSTR_6XX(c0ca0505_03800042, "stg.s32 g[r0.z+5], r8.y, 3"), - INSTR_6XX(c0ca0500_03800042, "stg.s32 g[r0.z], r8.y, 3"), - INSTR_6XX(c0ca0531_03800242, "stg.s32 g[r0.z+305], r8.y, 3"), - - /* Customely crafted */ - INSTR_6XX(c0d61104_01800228, "stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1"), - INSTR_6XX(c0d61104_01802628, "stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1"), - - INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */ - INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */ - INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"), - INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"), - INSTR_6XX(c0060003_0180c269, "ldg.u32 r0.w, g[r0.w+308], 1"), - - /* Found in TCS/TES shaders of GTA V */ - INSTR_6XX(c0020007_03c1420f, "ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3"), /* ldg.a.f32 r1.w, g[r1.y+((r1.w+1)<<2)], 3 */ - - /* Customely crafted */ - INSTR_6XX(c0020007_03c1740f, "ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3"), - - INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */ - INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */ - INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"), - INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"), - - /* dEQP-GLES3.functional.ubo.random.basic_arrays.0 */ - INSTR_6XX(c7020020_01800000, "stc c[32], r0.x, 1", .parse_fail=true), - /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */ - INSTR_6XX(c7060020_03800000, "stc c[32], r0.x, 3", .parse_fail=true), - - /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */ - INSTR_6XX(c0260200_03676100, "stib.b.untyped.1d.u32.3.imm.base0 r0.x, r0.w, 1"), /* stib.untyped.u32.1d.3.mode4.base0 r0.x, r0.w, 1 */ - - INSTR_6XX(c0240402_00674100, "stib.b.untyped.1d.u16.1.imm.base0 r0.z, r0.x, 2"), + /* cat0 */ + INSTR_6XX(00000000_00000000, "nop"), + INSTR_6XX(00000200_00000000, "(rpt2)nop"), + INSTR_6XX(03000000_00000000, "end"), + INSTR_6XX(00800000_00000004, "br p0.x, #4"), + INSTR_6XX(00800000_fffffffc, "br p0.x, #-4"), + INSTR_6XX(00900000_00000003, "br !p0.x, #3"), + INSTR_6XX(03820000_00000015, "shps #21"), /* emit */ + INSTR_6XX(04021000_00000000, "(ss)shpe"), /* cut */ + INSTR_6XX(02220000_00000004, "getlast.w8 #4"), + INSTR_6XX(02820000_00000014, "getone #20"), /* kill p0.x */ + INSTR_6XX(00906020_00000007, "brao !p0.x, !p0.y, #7"), + INSTR_6XX(00804040_00000003, "braa p0.x, p0.y, #3"), + INSTR_6XX(07820000_00000000, "prede"), + INSTR_6XX(00800063_0000001e, "brac.3 #30"), + INSTR_6XX(06820000_00000000, "predt p0.x"), + INSTR_6XX(07020000_00000000, "predf p0.x"), + INSTR_6XX(07820000_00000000, "prede"), + + /* cat1 */ + INSTR_6XX(20244000_00000020, "mov.f32f32 r0.x, c8.x"), + INSTR_6XX(20200000_00000020, "mov.f16f16 hr0.x, hc8.x"), + INSTR_6XX(20150000_00000000, "cov.s32s16 hr0.x, r0.x"), + INSTR_6XX(20156004_00000c11, "(ul)mov.s32s32 r1.x, c<a0.x + 17>"), + INSTR_6XX(201100f4_00000000, "mova a0.x, hr0.x"), + INSTR_6XX(20244905_00000410, "(rpt1)mov.f32f32 r1.y, (r)c260.x"), + INSTR_6XX(20174004_00000008, "mov.s32s32 r<a0.x + 4>, r2.x"), + INSTR_6XX(20130000_00000005, "mov.s16s16 hr<a0.x>, hr1.y"), + INSTR_6XX(20110004_00000800, "mov.s16s16 hr1.x, hr<a0.x>"), + /* dEQP-VK.subgroups.ballot.compute.compute */ + INSTR_6XX(260cc3c0_00000000, "movmsk.w128 r48.x"), /* movmsk.w128 sr48.x */ + + INSTR_6XX(240cc004_00030201, "swz.u32u32 r1.x, r0.w, r0.y, r0.z"), + INSTR_6XX(2400c105_04030201, "gat.f16u32 r1.y, hr0.y, hr0.z, hr0.w, hr1.x"), + INSTR_6XX(240c0205_04030201, "sct.u32f16 hr1.y, hr0.z, hr0.w, hr1.x, r0.y"), + INSTR_6XX(2400c205_04030201, "sct.f16u32 r1.y, r0.z, r0.w, r1.x, hr0.y"), + + INSTR_6XX(20510005_0000ffff, "mov.s16s16 hr1.y, -1"), + INSTR_6XX(20400005_00003900, "mov.f16f16 hr1.y, h(0.625000)"), + INSTR_6XX(20400006_00003800, "mov.f16f16 hr1.z, h(0.500000)"), + INSTR_6XX(204880f5_00000000, "mova1 a1.x, 0"), + + /* cat2 */ + INSTR_6XX(40104002_0c210001, "add.f hr0.z, r0.y, c<a0.x + 33>"), + INSTR_6XX(40b80804_10408004, "(nop3) cmps.f.lt r1.x, (abs)r1.x, c16.x"), + INSTR_6XX(47308a02_00002000, "(rpt2)bary.f (ei)r0.z, (r)0, r0.x"), + INSTR_6XX(47348000_00002000, "flat.b (ei)r0.x, 0, r0.x"), + INSTR_6XX(43480801_00008001, "(nop3) absneg.s hr0.y, (abs)hr0.y"), + INSTR_6XX(50600004_2c010004, "(sy)mul.f hr1.x, hr1.x, h(0.5)"), + INSTR_6XX(42280807_27ff0000, "(nop3) add.s hr1.w, hr0.x, h(-1)"), + INSTR_6XX(40a500f8_2c000004, "cmps.f.ne p0.x, hr1.x, h(0.0)"), + INSTR_6XX(438000f8_20010009, "and.b p0.x, hr2.y, h(1)"), + INSTR_6XX(438000f9_00020001, "and.b p0.y, hr0.y, hr0.z"), + INSTR_6XX(40080902_50200006, "(rpt1)add.f hr0.z, (r)hr1.z, (neg)(r)hc8.x"), + INSTR_6XX(42380c01_00040001, "(sat)(nop3) add.s r0.y, r0.y, r1.x"), + INSTR_6XX(42480000_48801086, "(nop2) sub.u hr0.x, hc33.z, (neg)hr<a0.x + 128>"), + INSTR_6XX(46b00001_00001020, "clz.b r0.y, c8.x"), + INSTR_6XX(46700009_00000009, "bfrev.b r2.y, r2.y"), + + /* cat3 */ + INSTR_6XX(66000000_10421041, "sel.f16 hr0.x, hc16.y, hr0.x, hc16.z"), + INSTR_6XX(64848109_109a9099, "(rpt1)sel.b32 r2.y, c38.y, (r)r2.y, c38.z"), + INSTR_6XX(64810904_30521036, "(rpt1)sel.b32 r1.x, (r)c13.z, r0.z, (r)c20.z"), + INSTR_6XX(64818902_20041032, "(rpt1)sel.b32 r0.z, (r)c12.z, r0.w, (r)r1.x"), + INSTR_6XX(63820005_10315030, "mad.f32 r1.y, (neg)c12.x, r1.x, c12.y"), + INSTR_6XX(62050009_00091000, "mad.u24 r2.y, c0.x, r2.z, r2.y"), + INSTR_6XX(61828008_00081033, "madsh.m16 r2.x, c12.w, r1.y, r2.x"), + INSTR_6XX(65900820_100cb008, "(nop3) shlg hr8.x, 8, hr8.x, 12"), /* (nop3) shlg.b16 hr8.x, (r)8, (r)hr8.x, 12; */ + INSTR_6XX(65ae085c_0002a001, "(nop3) shlg hr23.x, hr0.y, hr23.x, hr0.z"), /* not seen in blob */ + INSTR_6XX(65900820_0c0aac05, "(nop3) shlg hr8.x, hc<a0.x + 5>, hr8.x, hc<a0.x + 10>"), /* not seen in blob */ + INSTR_6XX(65ae0c5c_0002a001, "(nop3) shlg r23.x, r0.y, r23.x, r0.z"), /* (nop3) shlg.b32 r23.x, (r)r0.y, (r)r23.x, r0.z */ + INSTR_6XX(64018802_0002e003, "(nop3) shrm hr0.z, (neg)hr0.w, hr0.w, hr0.z"), + INSTR_6XX(64818802_0002e003, "(nop3) shlm hr0.z, (neg)hr0.w, hr0.w, hr0.z"), + INSTR_6XX(65018802_0002e003, "(nop3) shrg hr0.z, (neg)hr0.w, hr0.w, hr0.z"), + INSTR_6XX(66018802_0002e003, "(nop3) andg hr0.z, (neg)hr0.w, hr0.w, hr0.z"), + INSTR_6XX(67018802_1002e003, "(nop3) wmm hr0.z, (neg)hr0.w, hr0.w, 2"), /* (nop3) wmm.f16f16 hr0.z, (abs)(r)hr0.w, (r)hr0.w, 2 */ + INSTR_6XX(67018c02_1002e003, "(nop3) wmm.accu hr0.z, (neg)hr0.w, hr0.w, 2"), + INSTR_6XX(6701c802_9002a003, "(nop3) wmm r0.z, r0.w, r0.w, 2"), /* (nop3) wmm.f32f32 r0.z, (r)r0.w, (r)r0.w, 2 */ + /* custom test with qcom_dot8 function from cl_qcom_dot_product8 */ + INSTR_6XX(66818c02_0002e003, "(sat)(nop3) dp2acc.mixed.low r0.z, r0.w, r0.w, r0.z"), /* (nop3) dp2acc (sat)r0.z, (signed)(low)(r)r0.w, (low)(r)r0.w, r0.z */ + INSTR_6XX(6681c802_8002a003, "(nop3) dp4acc.unsigned.low r0.z, r0.w, r0.w, (neg)r0.z"), /* (nop3) dp4acc r0.z, (unsigned)(r)r0.w, (r)r0.w, (neg)r0.z */ + + /* cat4 */ + INSTR_6XX(8010000a_00000003, "rcp r2.z, r0.w"), + + /* cat5 */ + /* dEQP-VK.glsl.derivate.dfdx.uniform_if.float_mediump */ + INSTR_6XX(a3801102_00000001, "dsx (f32)(x)r0.z, r0.x"), /* dsx (f32)(xOOO)r0.z, r0.x */ + /* dEQP-VK.glsl.derivate.dfdy.uniform_if.float_mediump */ + INSTR_6XX(a3c01102_00000001, "dsy (f32)(x)r0.z, r0.x"), /* dsy (f32)(xOOO)r0.z, r0.x */ + /* dEQP-VK.glsl.derivate.dfdxfine.uniform_loop.float_highp */ + INSTR_6XX(a6001105_00000001, "dsxpp.1 (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */ + INSTR_6XX(a6201105_00000001, "dsxpp.1.p (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */ + + INSTR_6XX(a2802f00_00000001, "getsize (u16)(xyzw)hr0.x, r0.x, t#0"), + INSTR_6XX(a0c89f04_c4600005, "sam.base1 (f32)(xyzw)r1.x, r0.z, s#3, t#2"), /* sam.s2en.mode6.base1 (f32)(xyzw)r1.x, r0.z, 35 */ + INSTR_6XX(a1c85f00_c0200005, "getlod.base0 (s32)(xyzw)r0.x, r0.z, s#1, t#0"), /* getlod.s2en.mode6.base0 (s32)(xyzw)r0.x, r0.z, 1 */ + INSTR_6XX(a1000f00_00000004, "samb (f16)(xyzw)hr0.x, hr0.z, hr0.x, s#0, t#0"), + INSTR_6XX(a1000f00_00000003, "samb (f16)(xyzw)hr0.x, r0.y, r0.x, s#0, t#0"), + INSTR_6XX(a0c00f00_04400002, "sam (f16)(xyzw)hr0.x, hr0.y, s#2, t#2"), + INSTR_6XX(a6c02f00_00000000, "rgetinfo (u16)(xyzw)hr0.x"), + INSTR_6XX(a3482f08_c0000000, "getinfo.base0 (u16)(xyzw)hr2.x, t#0"), + /* dEQP-GLES31.functional.texture.texture_buffer.render.as_fragment_texture.buffer_size_65536 */ + INSTR_5XX(a2c03102_00000000, "getbuf (u32)(x)r0.z, t#0"), + INSTR_6XX(a0c81f00_e0200005, "sam.base0 (f32)(xyzw)r0.x, r0.z, s#1, a1.x"), + INSTR_6XX(a0c81108_e2000001, "sam.base0 (f32)(x)r2.x, r0.x, s#16, a1.x"), + INSTR_6XX(a048d107_cc080a07, "isaml.base3 (s32)(x)r1.w, r0.w, r1.y, s#0, t#6"), + + + /* dEQP-VK.subgroups.arithmetic.compute.subgroupadd_float */ + INSTR_6XX(a7c03102_00100003, "brcst.active.w8 (u32)(x)r0.z, r0.y"), /* brcst.active.w8 (u32)(xOOO)r0.z, r0.y */ + /* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */ + INSTR_6XX(b7e03107_00000401, "(sy)quad_shuffle.brcst (u32)(x)r1.w, r0.x, r0.z"), /* (sy)quad_shuffle.brcst (u32)(xOOO)r1.w, r0.x, r0.z */ + /* dEQP-VK.subgroups.quad.graphics.subgroupquadswapdiagonal_int */ + INSTR_6XX(b7e03104_00180001, "(sy)quad_shuffle.diag (u32)(x)r1.x, r0.x"), /* (sy)quad_shuffle.diag (u32)(xOOO)r1.x, r0.x */ + + /* cat6 */ + + INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"), /* ldgb.a.untyped.1dtype.u32.1 r0.x, g[r1.x], r0.x, 0 */ + INSTR_5XX(d7660204_02000a01, "(sy)stib.typed.2d.u32.1 g[1], r0.x, r0.z, r1.x"), /* (sy)stib.a.u32.2d.1 g[r1.x], r0.x, r0.z, 1. r1.x is offset in ibo, r0.x is value*/ + /* dEQP-VK.image.load_store.1d_array.r8g8b8a8_unorm */ + INSTR_5XX(c1a20006_0600ba01, "ldib.typed.2d.f32.4 r1.z, g[0], r0.z, r1.z"), /* ldib.a.f32.2d.4 r1.z, g[r0.z], r1.z, 0. r0.z is offset in ibo as src. r1.z */ + /* dEQP-VK.image.load_store.3d.r32g32b32a32_sint */ + INSTR_5XX(c1aa0003_0500fc01, "ldib.typed.3d.s32.4 r0.w, g[0], r0.w, r1.y"), /* ldib.a.s32.3d.4 r0.w, g[r0.w], r1.y, 0. r0.w is offset in ibo as src, and dst */ + /* dEQP-VK.binding_model.shader_access.primary_cmd_buf.storage_image.vertex.descriptor_array.3d */ + INSTR_5XX(c1a20204_0401fc01, "ldib.typed.3d.f32.4 r1.x, g[1], r1.w, r1.x"), /* ldib.a.f32.3d.4 r1.x, g[r1.w], r1.x, 1 */ + /* dEQP-VK.binding_model.shader_access.secondary_cmd_buf.with_push.storage_texel_buffer.vertex_fragment.single_descriptor.offset_zero */ + INSTR_5XX(c1a20005_0501be01, "ldib.typed.4d.f32.4 r1.y, g[0], r1.z, r1.y"), /* ldib.a.f32.1dtype.4 r1.y, g[r1.z], r1.y, 0 */ + /* dEQP-VK.texture.filtering.cube.formats.r8g8b8a8_snorm_nearest */ + INSTR_5XX(c1a60200_0000ba01, "ldib.typed.2d.u32.4 r0.x, g[1], r0.z, r0.x"), /* ldib.a.u32.2d.4 r0.x, g[r0.z], r0.x, 1 */ + + // TODO is this a real instruction? Or float -6.0 ? + // INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true), + /* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */ + INSTR_6XX(c0d20906_02800004, "stg.a.f32 g[r1.x+(r1.z)<<2], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */ + INSTR_6XX(c0da052e_01800042, "stg.a.s32 g[r0.z+(r11.z)<<2], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */ + INSTR_6XX(c0dc052e_01800042, "stg.a.u8 g[r0.z+(r11.z)<<2], hr8.y, 1"), + INSTR_6XX(c0ca0505_03800042, "stg.s32 g[r0.z+5], r8.y, 3"), + INSTR_6XX(c0ca0500_03800042, "stg.s32 g[r0.z], r8.y, 3"), + INSTR_6XX(c0ca0531_03800242, "stg.s32 g[r0.z+305], r8.y, 3"), + INSTR_5XX(c0ce0100_02800000, "stg.s8 g[r0.x], hr0.x, 2"), + INSTR_5XX(c0c00100_02800000, "stg.f16 g[r0.x], hr0.x, 2"), + + /* Customely crafted */ + INSTR_6XX(c0d61104_01800228, "stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1"), + INSTR_6XX(c0d61104_01802628, "stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1"), + + INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */ + INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */ + INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"), + INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"), + INSTR_6XX(c0060003_0180c269, "ldg.u32 r0.w, g[r0.w+308], 1"), + INSTR_6XX(c0040003_0180c269, "ldg.u16 hr0.w, g[r0.w+308], 1"), + + /* Found in TCS/TES shaders of GTA V */ + INSTR_6XX(c0020007_03c1420f, "ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3"), /* ldg.a.f32 r1.w, g[r1.y+((r1.w+1)<<2)], 3 */ + + /* Customely crafted */ + INSTR_6XX(c0020007_03c1740f, "ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3"), + + INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */ + INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */ + INSTR_6XX(c0000006_01c18017, "ldg.a.f16 hr1.z, g[r1.z+(r2.w)<<2], 1"), + INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"), + INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"), + + /* dEQP-GLES3.functional.ubo.random.basic_arrays.0 */ + INSTR_6XX(c7020020_01800000, "stc.f32 c[32], r0.x, 1"), /* stc c[32], r0.x, 1 */ + /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */ + INSTR_6XX(c7060020_03800000, "stc.u32 c[32], r0.x, 3"), /* stc c[32], r0.x, 3 */ + + /* custom */ + INSTR_6XX(c7060100_03800000, "stc.u32 c[a1.x], r0.x, 3"), /* stc c[a1.x], r0.x, 3 */ + INSTR_6XX(c7060120_03800000, "stc.u32 c[a1.x+32], r0.x, 3"), /* stc c[a1.x+32], r0.x, 3 */ + + /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */ + INSTR_6XX(c0260200_03676100, "stib.b.untyped.1d.u32.3.imm.base0 r0.x, r0.w, 1"), /* stib.untyped.u32.1d.3.mode4.base0 r0.x, r0.w, 1 */ + + INSTR_6XX(c0240402_00674100, "stib.b.untyped.1d.u16.1.imm.base0 hr0.z, r0.x, 2"), #if 0 /* TODO blob sometimes/frequently sets b0, although there does not seem * to be an obvious pattern and our encoding never sets it. AFAICT it @@ -298,6 +332,13 @@ static const struct test { INSTR_6XX(c0260000_00c78080, "ldc.offset0.1.nonuniform r0.x, 0, r0.x"), /* ldc.1.mode2.base0 r0.x, 0, r0.x */ INSTR_6XX(c0260201_00c78080, "ldc.offset0.1.nonuniform r0.y, 0, r0.y"), /* ldc.1.mode2.base0 r0.y, 0, r0.y */ + /* a4xx-a5xx has the exact same instrs in + * dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.(dynamically_)uniform_fragment + * with no change based on the mode. Note that we can't decode this yet. + */ + /* INSTR_4XX(c7860000_00810001), */ /* ldc.1 r0.x, g[r1.x], 0, r0.x */ + /* INSTR_5XX(c7860000_00800000), */ /* ldc.a.1 r0.x, g[r0.x], 0, r0.x */ + /* custom */ INSTR_6XX(c0260201_ffc78080, "ldc.offset0.1.nonuniform r0.y, 255, r0.y"), /* ldc.1.mode2.base0 r0.y, 255, r0.y */ @@ -307,6 +348,11 @@ static const struct test { INSTR_6XX(c0260000_00478400, "ldc.offset2.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */ INSTR_6XX(c0260000_00478600, "ldc.offset3.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */ + /* dEQP-VK.glsl.conditionals.if.if_else_vertex */ + INSTR_6XX(c0360000_00c78100, "ldc.1.k.imm.base0 c[a1.x], 0, 0"), /* ldc.1.k.mode4.base0 c[a1.x], 0, 0 */ + /* custom */ + INSTR_6XX(c0360003_00c78100, "ldc.4.k.imm.base0 c[a1.x], 0, 0"), /* ldc.4.k.mode4.base0 c[a1.x], 0, 0 */ + /* dEQP-VK.glsl.struct.local.nested_struct_array_dynamic_index_fragment */ INSTR_6XX(c1425b50_01803e02, "stp.f32 p[r11.y-176], r0.y, 1"), INSTR_6XX(c1425b98_02803e14, "stp.f32 p[r11.y-104], r2.z, 2"), @@ -318,14 +364,17 @@ static const struct test { /* Atomic: */ #if 0 /* TODO our encoding differs in b53 for these two */ - INSTR_5XX(c4d60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"), - INSTR_5XX(c4160205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"), + INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"), + INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"), #else - INSTR_5XX(c4f60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"), - INSTR_5XX(c4360205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"), + INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"), + INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"), #endif INSTR_6XX(d5c60003_03008001, "(sy)atomic.max.untyped.1d.u32.1.l r0.w, l[r0.z], r0.w"), + /* dEQP-VK.glsl.atomic_operations.add_unsigned_compute_reference */ + INSTR_6XX(c4160002_02000001, "atomic.g.add.untyped.1d.u32.1.g r0.z, r0.x, r0.z"), + /* Bindless atomic: */ INSTR_6XX(c03a0003_01640000, "atomic.b.add.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.add.g.s32.1d.mode0.base0 r0.w,r0.y,0 */ INSTR_6XX(c03a0003_01660000, "atomic.b.and.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.and.g.s32.1d.mode0.base0 r0.w,r0.y,0 */ @@ -333,10 +382,14 @@ static const struct test { /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_literal.fragment.sampler2d */ INSTR_6XX(a0c01f04_0cc00005, "sam (f32)(xyzw)r1.x, r0.z, s#6, t#6"), - /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.uniform.fragment.sampler2d (looks like maybe the compiler didn't figure out */ - INSTR_6XX(a0c81f07_0100000b, "sam.s2en (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode0 (f32)(xyzw)r1.w, r1.y, hr2.x */ + + /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.uniform.fragment.sampler2d */ + INSTR_4XX(a0c81f02_00800001, "sam.s2en.uniform (f32)(xyzw)r0.z, r0.x, hr1.x"), /* sam.s2en.mode0 (f32)(xyzw)r0.z, r0.x, hr1.x */ /* same for 5xx */ + INSTR_6XX(a0c81f07_0100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode0 (f32)(xyzw)r1.w, r1.y, hr2.x */ + /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler2d */ - INSTR_6XX(a0c81f07_8100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x", .parse_fail=true), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */ + INSTR_4XX(a0c81f02_80800001, "sam.s2en.nonuniform (f32)(xyzw)r0.z, r0.x, hr1.x"), /* sam.s2en.uniform (f32)(xyzw)r0.z, r0.x, hr1.x */ /* same for 5xx */ + INSTR_6XX(a0c81f07_8100000b, "sam.s2en.nonuniform (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */ /* NonUniform: */ /* dEQP-VK.descriptor_indexing.storage_buffer */ @@ -349,6 +402,9 @@ static const struct test { /* dEQP-VK.descriptor_indexing.sampler */ INSTR_6XX(a0c81f00_40000005, "sam.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.x"), + /* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */ + INSTR_6XX(c0260001_00c98000, "getfiberid.u32 r0.y"), + /* Custom test since we've never seen the blob emit these. */ INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"), INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"), @@ -416,7 +472,6 @@ main(int argc, char **argv) printf(" Got: \"%s\"\n", disasm_output); retval = 1; decode_fails++; - continue; } /* @@ -426,7 +481,8 @@ main(int argc, char **argv) unsigned gen = test->gpu_id / 100; if (!compilers[gen]) { dev_ids[gen].gpu_id = test->gpu_id; - compilers[gen] = ir3_compiler_create(NULL, &dev_ids[gen], false); + compilers[gen] = ir3_compiler_create(NULL, &dev_ids[gen], + &(struct ir3_compiler_options){}); } FILE *fasm = diff --git a/lib/mesa/src/freedreno/registers/dsi/dsi_phy_5nm.xml b/lib/mesa/src/freedreno/registers/dsi/dsi_phy_5nm.xml deleted file mode 100644 index 7e3505b9e..000000000 --- a/lib/mesa/src/freedreno/registers/dsi/dsi_phy_5nm.xml +++ /dev/null @@ -1,228 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<database xmlns="http://nouveau.freedesktop.org/" -xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" -xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> -<import file="freedreno_copyright.xml"/> - -<domain name="DSI_5nm_PHY_CMN" width="32"> - <reg32 offset="0x00000" name="REVISION_ID0"/> - <reg32 offset="0x00004" name="REVISION_ID1"/> - <reg32 offset="0x00008" name="REVISION_ID2"/> - <reg32 offset="0x0000c" name="REVISION_ID3"/> - <reg32 offset="0x00010" name="CLK_CFG0"/> - <reg32 offset="0x00014" name="CLK_CFG1"/> - <reg32 offset="0x00018" name="GLBL_CTRL"/> - <reg32 offset="0x0001c" name="RBUF_CTRL"/> - <reg32 offset="0x00020" name="VREG_CTRL_0"/> - <reg32 offset="0x00024" name="CTRL_0"/> - <reg32 offset="0x00028" name="CTRL_1"/> - <reg32 offset="0x0002c" name="CTRL_2"/> - <reg32 offset="0x00030" name="CTRL_3"/> - <reg32 offset="0x00034" name="LANE_CFG0"/> - <reg32 offset="0x00038" name="LANE_CFG1"/> - <reg32 offset="0x0003c" name="PLL_CNTRL"/> - <reg32 offset="0x00040" name="DPHY_SOT"/> - <reg32 offset="0x000a0" name="LANE_CTRL0"/> - <reg32 offset="0x000a4" name="LANE_CTRL1"/> - <reg32 offset="0x000a8" name="LANE_CTRL2"/> - <reg32 offset="0x000ac" name="LANE_CTRL3"/> - <reg32 offset="0x000b0" name="LANE_CTRL4"/> - <reg32 offset="0x000b4" name="TIMING_CTRL_0"/> - <reg32 offset="0x000b8" name="TIMING_CTRL_1"/> - <reg32 offset="0x000bc" name="TIMING_CTRL_2"/> - <reg32 offset="0x000c0" name="TIMING_CTRL_3"/> - <reg32 offset="0x000c4" name="TIMING_CTRL_4"/> - <reg32 offset="0x000c8" name="TIMING_CTRL_5"/> - <reg32 offset="0x000cc" name="TIMING_CTRL_6"/> - <reg32 offset="0x000d0" name="TIMING_CTRL_7"/> - <reg32 offset="0x000d4" name="TIMING_CTRL_8"/> - <reg32 offset="0x000d8" name="TIMING_CTRL_9"/> - <reg32 offset="0x000dc" name="TIMING_CTRL_10"/> - <reg32 offset="0x000e0" name="TIMING_CTRL_11"/> - <reg32 offset="0x000e4" name="TIMING_CTRL_12"/> - <reg32 offset="0x000e8" name="TIMING_CTRL_13"/> - <reg32 offset="0x000ec" name="GLBL_HSTX_STR_CTRL_0"/> - <reg32 offset="0x000f0" name="GLBL_HSTX_STR_CTRL_1"/> - <reg32 offset="0x000f4" name="GLBL_RESCODE_OFFSET_TOP_CTRL"/> - <reg32 offset="0x000f8" name="GLBL_RESCODE_OFFSET_BOT_CTRL"/> - <reg32 offset="0x000fc" name="GLBL_RESCODE_OFFSET_MID_CTRL"/> - <reg32 offset="0x00100" name="GLBL_LPTX_STR_CTRL"/> - <reg32 offset="0x00104" name="GLBL_PEMPH_CTRL_0"/> - <reg32 offset="0x00108" name="GLBL_PEMPH_CTRL_1"/> - <reg32 offset="0x0010c" name="GLBL_STR_SWI_CAL_SEL_CTRL"/> - <reg32 offset="0x00110" name="VREG_CTRL_1"/> - <reg32 offset="0x00114" name="CTRL_4"/> - <reg32 offset="0x00140" name="PHY_STATUS"/> - <reg32 offset="0x00148" name="LANE_STATUS0"/> - <reg32 offset="0x0014c" name="LANE_STATUS1"/> -</domain> - -<domain name="DSI_5nm_PHY" width="32"> - <array offset="0x00000" name="LN" length="5" stride="0x80"> - <reg32 offset="0x00" name="CFG0"/> - <reg32 offset="0x04" name="CFG1"/> - <reg32 offset="0x08" name="CFG2"/> - <reg32 offset="0x0c" name="TEST_DATAPATH"/> - <reg32 offset="0x10" name="PIN_SWAP"/> - <reg32 offset="0x14" name="LPRX_CTRL"/> - <reg32 offset="0x18" name="TX_DCTRL"/> - </array> -</domain> - -<domain name="DSI_5nm_PHY_PLL" width="32"> - <reg32 offset="0x0000" name="ANALOG_CONTROLS_ONE"/> - <reg32 offset="0x0004" name="ANALOG_CONTROLS_TWO"/> - <reg32 offset="0x0008" name="INT_LOOP_SETTINGS"/> - <reg32 offset="0x000c" name="INT_LOOP_SETTINGS_TWO"/> - <reg32 offset="0x0010" name="ANALOG_CONTROLS_THREE"/> - <reg32 offset="0x0014" name="ANALOG_CONTROLS_FOUR"/> - <reg32 offset="0x0018" name="ANALOG_CONTROLS_FIVE"/> - <reg32 offset="0x001c" name="INT_LOOP_CONTROLS"/> - <reg32 offset="0x0020" name="DSM_DIVIDER"/> - <reg32 offset="0x0024" name="FEEDBACK_DIVIDER"/> - <reg32 offset="0x0028" name="SYSTEM_MUXES"/> - <reg32 offset="0x002c" name="FREQ_UPDATE_CONTROL_OVERRIDES"/> - <reg32 offset="0x0030" name="CMODE"/> - <reg32 offset="0x0034" name="PSM_CTRL"/> - <reg32 offset="0x0038" name="RSM_CTRL"/> - <reg32 offset="0x003c" name="VCO_TUNE_MAP"/> - <reg32 offset="0x0040" name="PLL_CNTRL"/> - <reg32 offset="0x0044" name="CALIBRATION_SETTINGS"/> - <reg32 offset="0x0048" name="BAND_SEL_CAL_TIMER_LOW"/> - <reg32 offset="0x004c" name="BAND_SEL_CAL_TIMER_HIGH"/> - <reg32 offset="0x0050" name="BAND_SEL_CAL_SETTINGS"/> - <reg32 offset="0x0054" name="BAND_SEL_MIN"/> - <reg32 offset="0x0058" name="BAND_SEL_MAX"/> - <reg32 offset="0x005c" name="BAND_SEL_PFILT"/> - <reg32 offset="0x0060" name="BAND_SEL_IFILT"/> - <reg32 offset="0x0064" name="BAND_SEL_CAL_SETTINGS_TWO"/> - <reg32 offset="0x0068" name="BAND_SEL_CAL_SETTINGS_THREE"/> - <reg32 offset="0x006c" name="BAND_SEL_CAL_SETTINGS_FOUR"/> - <reg32 offset="0x0070" name="BAND_SEL_ICODE_HIGH"/> - <reg32 offset="0x0074" name="BAND_SEL_ICODE_LOW"/> - <reg32 offset="0x0078" name="FREQ_DETECT_SETTINGS_ONE"/> - <reg32 offset="0x007c" name="FREQ_DETECT_THRESH"/> - <reg32 offset="0x0080" name="FREQ_DET_REFCLK_HIGH"/> - <reg32 offset="0x0084" name="FREQ_DET_REFCLK_LOW"/> - <reg32 offset="0x0088" name="FREQ_DET_PLLCLK_HIGH"/> - <reg32 offset="0x008c" name="FREQ_DET_PLLCLK_LOW"/> - <reg32 offset="0x0090" name="PFILT"/> - <reg32 offset="0x0094" name="IFILT"/> - <reg32 offset="0x0098" name="PLL_GAIN"/> - <reg32 offset="0x009c" name="ICODE_LOW"/> - <reg32 offset="0x00a0" name="ICODE_HIGH"/> - <reg32 offset="0x00a4" name="LOCKDET"/> - <reg32 offset="0x00a8" name="OUTDIV"/> - <reg32 offset="0x00ac" name="FASTLOCK_CONTROL"/> - <reg32 offset="0x00b0" name="PASS_OUT_OVERRIDE_ONE"/> - <reg32 offset="0x00b4" name="PASS_OUT_OVERRIDE_TWO"/> - <reg32 offset="0x00b8" name="CORE_OVERRIDE"/> - <reg32 offset="0x00bc" name="CORE_INPUT_OVERRIDE"/> - <reg32 offset="0x00c0" name="RATE_CHANGE"/> - <reg32 offset="0x00c4" name="PLL_DIGITAL_TIMERS"/> - <reg32 offset="0x00c8" name="PLL_DIGITAL_TIMERS_TWO"/> - <reg32 offset="0x00cc" name="DECIMAL_DIV_START"/> - <reg32 offset="0x00d0" name="FRAC_DIV_START_LOW"/> - <reg32 offset="0x00d4" name="FRAC_DIV_START_MID"/> - <reg32 offset="0x00d8" name="FRAC_DIV_START_HIGH"/> - <reg32 offset="0x00dc" name="DEC_FRAC_MUXES"/> - <reg32 offset="0x00e0" name="DECIMAL_DIV_START_1"/> - <reg32 offset="0x00e4" name="FRAC_DIV_START_LOW_1"/> - <reg32 offset="0x00e8" name="FRAC_DIV_START_MID_1"/> - <reg32 offset="0x00ec" name="FRAC_DIV_START_HIGH_1"/> - <reg32 offset="0x00f0" name="DECIMAL_DIV_START_2"/> - <reg32 offset="0x00f4" name="FRAC_DIV_START_LOW_2"/> - <reg32 offset="0x00f8" name="FRAC_DIV_START_MID_2"/> - <reg32 offset="0x00fc" name="FRAC_DIV_START_HIGH_2"/> - <reg32 offset="0x0100" name="MASH_CONTROL"/> - <reg32 offset="0x0104" name="SSC_STEPSIZE_LOW"/> - <reg32 offset="0x0108" name="SSC_STEPSIZE_HIGH"/> - <reg32 offset="0x010c" name="SSC_DIV_PER_LOW"/> - <reg32 offset="0x0110" name="SSC_DIV_PER_HIGH"/> - <reg32 offset="0x0114" name="SSC_ADJPER_LOW"/> - <reg32 offset="0x0118" name="SSC_ADJPER_HIGH"/> - <reg32 offset="0x011c" name="SSC_MUX_CONTROL"/> - <reg32 offset="0x0120" name="SSC_STEPSIZE_LOW_1"/> - <reg32 offset="0x0124" name="SSC_STEPSIZE_HIGH_1"/> - <reg32 offset="0x0128" name="SSC_DIV_PER_LOW_1"/> - <reg32 offset="0x012c" name="SSC_DIV_PER_HIGH_1"/> - <reg32 offset="0x0130" name="SSC_ADJPER_LOW_1"/> - <reg32 offset="0x0134" name="SSC_ADJPER_HIGH_1"/> - <reg32 offset="0x0138" name="SSC_STEPSIZE_LOW_2"/> - <reg32 offset="0x013c" name="SSC_STEPSIZE_HIGH_2"/> - <reg32 offset="0x0140" name="SSC_DIV_PER_LOW_2"/> - <reg32 offset="0x0144" name="SSC_DIV_PER_HIGH_2"/> - <reg32 offset="0x0148" name="SSC_ADJPER_LOW_2"/> - <reg32 offset="0x014c" name="SSC_ADJPER_HIGH_2"/> - <reg32 offset="0x0150" name="SSC_CONTROL"/> - <reg32 offset="0x0154" name="PLL_OUTDIV_RATE"/> - <reg32 offset="0x0158" name="PLL_LOCKDET_RATE_1"/> - <reg32 offset="0x015c" name="PLL_LOCKDET_RATE_2"/> - <reg32 offset="0x0160" name="PLL_PROP_GAIN_RATE_1"/> - <reg32 offset="0x0164" name="PLL_PROP_GAIN_RATE_2"/> - <reg32 offset="0x0168" name="PLL_BAND_SEL_RATE_1"/> - <reg32 offset="0x016c" name="PLL_BAND_SEL_RATE_2"/> - <reg32 offset="0x0170" name="PLL_INT_GAIN_IFILT_BAND_1"/> - <reg32 offset="0x0174" name="PLL_INT_GAIN_IFILT_BAND_2"/> - <reg32 offset="0x0178" name="PLL_FL_INT_GAIN_PFILT_BAND_1"/> - <reg32 offset="0x017c" name="PLL_FL_INT_GAIN_PFILT_BAND_2"/> - <reg32 offset="0x0180" name="PLL_FASTLOCK_EN_BAND"/> - <reg32 offset="0x0184" name="FREQ_TUNE_ACCUM_INIT_MID"/> - <reg32 offset="0x0188" name="FREQ_TUNE_ACCUM_INIT_HIGH"/> - <reg32 offset="0x018c" name="FREQ_TUNE_ACCUM_INIT_MUX"/> - <reg32 offset="0x0190" name="PLL_LOCK_OVERRIDE"/> - <reg32 offset="0x0194" name="PLL_LOCK_DELAY"/> - <reg32 offset="0x0198" name="PLL_LOCK_MIN_DELAY"/> - <reg32 offset="0x019c" name="CLOCK_INVERTERS"/> - <reg32 offset="0x01a0" name="SPARE_AND_JPC_OVERRIDES"/> - <reg32 offset="0x01a4" name="BIAS_CONTROL_1"/> - <reg32 offset="0x01a8" name="BIAS_CONTROL_2"/> - <reg32 offset="0x01ac" name="ALOG_OBSV_BUS_CTRL_1"/> - <reg32 offset="0x01b0" name="COMMON_STATUS_ONE"/> - <reg32 offset="0x01b4" name="COMMON_STATUS_TWO"/> - <reg32 offset="0x01b8" name="BAND_SEL_CAL"/> - <reg32 offset="0x01bc" name="ICODE_ACCUM_STATUS_LOW"/> - <reg32 offset="0x01c0" name="ICODE_ACCUM_STATUS_HIGH"/> - <reg32 offset="0x01c4" name="FD_OUT_LOW"/> - <reg32 offset="0x01c8" name="FD_OUT_HIGH"/> - <reg32 offset="0x01cc" name="ALOG_OBSV_BUS_STATUS_1"/> - <reg32 offset="0x01d0" name="PLL_MISC_CONFIG"/> - <reg32 offset="0x01d4" name="FLL_CONFIG"/> - <reg32 offset="0x01d8" name="FLL_FREQ_ACQ_TIME"/> - <reg32 offset="0x01dc" name="FLL_CODE0"/> - <reg32 offset="0x01e0" name="FLL_CODE1"/> - <reg32 offset="0x01e4" name="FLL_GAIN0"/> - <reg32 offset="0x01e8" name="FLL_GAIN1"/> - <reg32 offset="0x01ec" name="SW_RESET"/> - <reg32 offset="0x01f0" name="FAST_PWRUP"/> - <reg32 offset="0x01f4" name="LOCKTIME0"/> - <reg32 offset="0x01f8" name="LOCKTIME1"/> - <reg32 offset="0x01fc" name="DEBUG_BUS_SEL"/> - <reg32 offset="0x0200" name="DEBUG_BUS0"/> - <reg32 offset="0x0204" name="DEBUG_BUS1"/> - <reg32 offset="0x0208" name="DEBUG_BUS2"/> - <reg32 offset="0x020c" name="DEBUG_BUS3"/> - <reg32 offset="0x0210" name="ANALOG_FLL_CONTROL_OVERRIDES"/> - <reg32 offset="0x0214" name="VCO_CONFIG"/> - <reg32 offset="0x0218" name="VCO_CAL_CODE1_MODE0_STATUS"/> - <reg32 offset="0x021c" name="VCO_CAL_CODE1_MODE1_STATUS"/> - <reg32 offset="0x0220" name="RESET_SM_STATUS"/> - <reg32 offset="0x0224" name="TDC_OFFSET"/> - <reg32 offset="0x0228" name="PS3_PWRDOWN_CONTROLS"/> - <reg32 offset="0x022c" name="PS4_PWRDOWN_CONTROLS"/> - <reg32 offset="0x0230" name="PLL_RST_CONTROLS"/> - <reg32 offset="0x0234" name="GEAR_BAND_SELECT_CONTROLS"/> - <reg32 offset="0x0238" name="PSM_CLK_CONTROLS"/> - <reg32 offset="0x023c" name="SYSTEM_MUXES_2"/> - <reg32 offset="0x0240" name="VCO_CONFIG_1"/> - <reg32 offset="0x0244" name="VCO_CONFIG_2"/> - <reg32 offset="0x0248" name="CLOCK_INVERTERS_1"/> - <reg32 offset="0x024c" name="CLOCK_INVERTERS_2"/> - <reg32 offset="0x0250" name="CMODE_1"/> - <reg32 offset="0x0254" name="CMODE_2"/> - <reg32 offset="0x0258" name="ANALOG_CONTROLS_FIVE_1"/> - <reg32 offset="0x025c" name="ANALOG_CONTROLS_FIVE_2"/> - <reg32 offset="0x0260" name="PERF_OPTIMIZE"/> -</domain> - -</database> diff --git a/lib/mesa/src/freedreno/vulkan/tu_clear_blit.c b/lib/mesa/src/freedreno/vulkan/tu_clear_blit.c index 8d38a8fd0..6caa31beb 100644 --- a/lib/mesa/src/freedreno/vulkan/tu_clear_blit.c +++ b/lib/mesa/src/freedreno/vulkan/tu_clear_blit.c @@ -30,27 +30,27 @@ tu_pack_float32_for_unorm(float val, int bits) /* r2d_ = BLIT_OP_SCALE operations */ static enum a6xx_2d_ifmt -format_to_ifmt(VkFormat format) +format_to_ifmt(enum pipe_format format) { - if (format == VK_FORMAT_D24_UNORM_S8_UINT || - format == VK_FORMAT_X8_D24_UNORM_PACK32) + if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT || + format == PIPE_FORMAT_Z24X8_UNORM) return R2D_UNORM8; /* get_component_bits doesn't work with depth/stencil formats: */ - if (format == VK_FORMAT_D16_UNORM || format == VK_FORMAT_D32_SFLOAT) + if (format == PIPE_FORMAT_Z16_UNORM || format == PIPE_FORMAT_Z32_FLOAT) return R2D_FLOAT32; - if (format == VK_FORMAT_S8_UINT) + if (format == PIPE_FORMAT_S8_UINT) return R2D_INT8; /* use the size of the red channel to find the corresponding "ifmt" */ - bool is_int = vk_format_is_int(format); - switch (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) { + bool is_int = util_format_is_pure_integer(format); + switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) { case 4: case 5: case 8: return is_int ? R2D_INT8 : R2D_UNORM8; case 10: case 11: return is_int ? R2D_INT16 : R2D_FLOAT16; case 16: - if (vk_format_is_float(format)) + if (util_format_is_float(format)) return R2D_FLOAT16; return is_int ? R2D_INT16 : R2D_FLOAT32; case 32: @@ -82,38 +82,38 @@ r2d_coords(struct tu_cs *cs, } static void -r2d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val) +r2d_clear_value(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val) { uint32_t clear_value[4] = {}; switch (format) { - case VK_FORMAT_X8_D24_UNORM_PACK32: - case VK_FORMAT_D24_UNORM_S8_UINT: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_Z24X8_UNORM: /* cleared as r8g8b8a8_unorm using special format */ clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24); clear_value[1] = clear_value[0] >> 8; clear_value[2] = clear_value[0] >> 16; clear_value[3] = val->depthStencil.stencil; break; - case VK_FORMAT_D16_UNORM: - case VK_FORMAT_D32_SFLOAT: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z32_FLOAT: /* R2D_FLOAT32 */ clear_value[0] = fui(val->depthStencil.depth); break; - case VK_FORMAT_S8_UINT: + case PIPE_FORMAT_S8_UINT: clear_value[0] = val->depthStencil.stencil; break; - case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: + case PIPE_FORMAT_R9G9B9E5_FLOAT: /* cleared as UINT32 */ clear_value[0] = float3_to_rgb9e5(val->color.float32); break; default: - assert(!vk_format_is_depth_or_stencil(format)); - const struct util_format_description *desc = vk_format_description(format); + assert(!util_format_is_depth_or_stencil(format)); + const struct util_format_description *desc = util_format_description(format); enum a6xx_2d_ifmt ifmt = format_to_ifmt(format); assert(desc && (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || - format == VK_FORMAT_B10G11R11_UFLOAT_PACK32)); + format == PIPE_FORMAT_R11G11B10_FLOAT)); for (unsigned i = 0; i < desc->nr_channels; i++) { const struct util_format_channel_description *ch = &desc->channel[i]; @@ -144,7 +144,7 @@ r2d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val) static void r2d_src(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - const struct tu_image_view *iview, + const struct fdl6_view *iview, uint32_t layer, VkFilter filter) { @@ -162,6 +162,24 @@ r2d_src(struct tu_cmd_buffer *cmd, } static void +r2d_src_depth(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_image_view *iview, + uint32_t layer, + VkFilter filter) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5); + tu_cs_emit(cs, tu_image_view_depth(iview, SP_PS_2D_SRC_INFO)); + tu_cs_emit(cs, iview->view.SP_PS_2D_SRC_SIZE); + tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer); + /* SP_PS_2D_SRC_PITCH has shifted pitch field */ + tu_cs_emit(cs, iview->depth_PITCH << 9); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS, 3); + tu_cs_image_flag_ref(cs, &iview->view, layer); +} + +static void r2d_src_stencil(struct tu_cmd_buffer *cmd, struct tu_cs *cs, const struct tu_image_view *iview, @@ -170,7 +188,7 @@ r2d_src_stencil(struct tu_cmd_buffer *cmd, { tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5); tu_cs_emit(cs, tu_image_view_stencil(iview, SP_PS_2D_SRC_INFO) & ~A6XX_SP_PS_2D_SRC_INFO_FLAGS); - tu_cs_emit(cs, iview->SP_PS_2D_SRC_SIZE); + tu_cs_emit(cs, iview->view.SP_PS_2D_SRC_SIZE); tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer); /* SP_PS_2D_SRC_PITCH has shifted pitch field */ tu_cs_emit(cs, iview->stencil_PITCH << 9); @@ -179,17 +197,17 @@ r2d_src_stencil(struct tu_cmd_buffer *cmd, static void r2d_src_buffer(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - VkFormat vk_format, + enum pipe_format format, uint64_t va, uint32_t pitch, uint32_t width, uint32_t height) { - struct tu_native_format format = tu6_format_texture(vk_format, TILE6_LINEAR); + struct tu_native_format fmt = tu6_format_texture(format, TILE6_LINEAR); tu_cs_emit_regs(cs, A6XX_SP_PS_2D_SRC_INFO( - .color_format = format.fmt, - .color_swap = format.swap, - .srgb = vk_format_is_srgb(vk_format), + .color_format = fmt.fmt, + .color_swap = fmt.swap, + .srgb = util_format_is_srgb(format), .unk20 = 1, .unk22 = 1), A6XX_SP_PS_2D_SRC_SIZE(.width = width, .height = height), @@ -198,7 +216,7 @@ r2d_src_buffer(struct tu_cmd_buffer *cmd, } static void -r2d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) +r2d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer) { tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4); tu_cs_emit(cs, iview->RB_2D_DST_INFO); @@ -209,6 +227,18 @@ r2d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) } static void +r2d_dst_depth(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4); + tu_cs_emit(cs, tu_image_view_depth(iview, RB_2D_DST_INFO)); + tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer); + tu_cs_emit(cs, iview->depth_PITCH); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS, 3); + tu_cs_image_flag_ref(cs, &iview->view, layer); +} + +static void r2d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) { tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4); @@ -218,15 +248,15 @@ r2d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t la } static void -r2d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch) +r2d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch) { - struct tu_native_format format = tu6_format_color(vk_format, TILE6_LINEAR); + struct tu_native_format fmt = tu6_format_color(format, TILE6_LINEAR); tu_cs_emit_regs(cs, A6XX_RB_2D_DST_INFO( - .color_format = format.fmt, - .color_swap = format.swap, - .srgb = vk_format_is_srgb(vk_format)), + .color_format = fmt.fmt, + .color_swap = fmt.swap, + .srgb = util_format_is_srgb(format)), A6XX_RB_2D_DST(.qword = va), A6XX_RB_2D_DST_PITCH(pitch)); } @@ -234,24 +264,25 @@ r2d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch static void r2d_setup_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - VkFormat vk_format, + enum pipe_format format, VkImageAspectFlags aspect_mask, unsigned blit_param, bool clear, bool ubwc, bool scissor) { - enum a6xx_format format = tu6_base_format(vk_format); - enum a6xx_2d_ifmt ifmt = format_to_ifmt(vk_format); + enum a6xx_format fmt = tu6_base_format(format); + enum a6xx_2d_ifmt ifmt = format_to_ifmt(format); + uint32_t unknown_8c01 = 0; - if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT || - vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) { - format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; + if ((format == PIPE_FORMAT_Z24_UNORM_S8_UINT || + format == PIPE_FORMAT_Z24X8_UNORM) && ubwc) { + fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; } /* note: the only format with partial clearing is D24S8 */ - if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { + if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { /* preserve stencil channel */ if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) unknown_8c01 = 0x08000041; @@ -267,10 +298,10 @@ r2d_setup_common(struct tu_cmd_buffer *cmd, .scissor = scissor, .rotate = blit_param, .solid_color = clear, - .d24s8 = format == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear, - .color_format = format, + .d24s8 = fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear, + .color_format = fmt, .mask = 0xf, - .ifmt = vk_format_is_srgb(vk_format) ? R2D_UNORM8_SRGB : ifmt, + .ifmt = util_format_is_srgb(format) ? R2D_UNORM8_SRGB : ifmt, ).value; tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); @@ -279,21 +310,21 @@ r2d_setup_common(struct tu_cmd_buffer *cmd, tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); tu_cs_emit(cs, blit_cntl); - if (format == FMT6_10_10_10_2_UNORM_DEST) - format = FMT6_16_16_16_16_FLOAT; + if (fmt == FMT6_10_10_10_2_UNORM_DEST) + fmt = FMT6_16_16_16_16_FLOAT; tu_cs_emit_regs(cs, A6XX_SP_2D_DST_FORMAT( - .sint = vk_format_is_sint(vk_format), - .uint = vk_format_is_uint(vk_format), - .color_format = format, - .srgb = vk_format_is_srgb(vk_format), + .sint = util_format_is_pure_sint(format), + .uint = util_format_is_pure_uint(format), + .color_format = fmt, + .srgb = util_format_is_srgb(format), .mask = 0xf)); } static void r2d_setup(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - VkFormat vk_format, + enum pipe_format format, VkImageAspectFlags aspect_mask, unsigned blit_param, bool clear, @@ -302,9 +333,11 @@ r2d_setup(struct tu_cmd_buffer *cmd, { assert(samples == VK_SAMPLE_COUNT_1_BIT); - tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); + if (!cmd->state.pass) { + tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); + } - r2d_setup_common(cmd, cs, vk_format, aspect_mask, blit_param, clear, ubwc, false); + r2d_setup_common(cmd, cs, format, aspect_mask, blit_param, clear, ubwc, false); } static void @@ -546,21 +579,25 @@ compile_shader(struct tu_device *dev, struct nir_shader *nir, ir3_finalize_nir(dev->compiler, nir); - struct ir3_shader *sh = ir3_shader_from_nir(dev->compiler, nir, - align(consts, 4), NULL); + struct ir3_shader *sh = + ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) { + .api_wavesize = IR3_SINGLE_OR_DOUBLE, + .real_wavesize = IR3_SINGLE_OR_DOUBLE, + .reserved_user_consts = align(consts, 4), + }, NULL); struct ir3_shader_key key = {}; bool created; struct ir3_shader_variant *so = ir3_shader_get_variant(sh, &key, false, false, &created); - struct tu6_global *global = dev->global_bo.map; + struct tu6_global *global = dev->global_bo->map; assert(*offset + so->info.sizedwords <= ARRAY_SIZE(global->shaders)); dev->global_shaders[idx] = so; memcpy(&global->shaders[*offset], so->bin, sizeof(uint32_t) * so->info.sizedwords); - dev->global_shader_va[idx] = dev->global_bo.iova + + dev->global_shader_va[idx] = dev->global_bo->iova + gb_offset(shaders[*offset]); *offset += align(so->info.sizedwords, 32); } @@ -749,7 +786,7 @@ r3d_coords(struct tu_cs *cs, } static void -r3d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val) +r3d_clear_value(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val) { tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4); tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | @@ -760,8 +797,8 @@ r3d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val) tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); switch (format) { - case VK_FORMAT_X8_D24_UNORM_PACK32: - case VK_FORMAT_D24_UNORM_S8_UINT: { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: { /* cleared as r8g8b8a8_unorm using special format */ uint32_t tmp = tu_pack_float32_for_unorm(val->depthStencil.depth, 24); tu_cs_emit(cs, fui((tmp & 0xff) / 255.0f)); @@ -769,14 +806,14 @@ r3d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val) tu_cs_emit(cs, fui((tmp >> 16 & 0xff) / 255.0f)); tu_cs_emit(cs, fui((val->depthStencil.stencil & 0xff) / 255.0f)); } break; - case VK_FORMAT_D16_UNORM: - case VK_FORMAT_D32_SFLOAT: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z32_FLOAT: tu_cs_emit(cs, fui(val->depthStencil.depth)); tu_cs_emit(cs, 0); tu_cs_emit(cs, 0); tu_cs_emit(cs, 0); break; - case VK_FORMAT_S8_UINT: + case PIPE_FORMAT_S8_UINT: tu_cs_emit(cs, val->depthStencil.stencil & 0xff); tu_cs_emit(cs, 0); tu_cs_emit(cs, 0); @@ -784,7 +821,7 @@ r3d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val) break; default: /* as color formats use clear value as-is */ - assert(!vk_format_is_depth_or_stencil(format)); + assert(!util_format_is_depth_or_stencil(format)); tu_cs_emit_array(cs, val->color.uint32, 4); break; } @@ -823,7 +860,6 @@ r3d_src_common(struct tu_cmd_buffer *cmd, A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE) | 0x60000; /* XXX used by blob, doesn't seem necessary */ texture.map[A6XX_TEX_CONST_DWORDS + 1] = - 0x1 | /* XXX used by blob, doesn't seem necessary */ A6XX_TEX_SAMP_1_UNNORM_COORDS | A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR; texture.map[A6XX_TEX_CONST_DWORDS + 2] = 0; @@ -854,7 +890,7 @@ r3d_src_common(struct tu_cmd_buffer *cmd, static void r3d_src(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - const struct tu_image_view *iview, + const struct fdl6_view *iview, uint32_t layer, VkFilter filter) { @@ -867,23 +903,23 @@ r3d_src(struct tu_cmd_buffer *cmd, static void r3d_src_buffer(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - VkFormat vk_format, + enum pipe_format format, uint64_t va, uint32_t pitch, uint32_t width, uint32_t height) { uint32_t desc[A6XX_TEX_CONST_DWORDS]; - struct tu_native_format format = tu6_format_texture(vk_format, TILE6_LINEAR); + struct tu_native_format fmt = tu6_format_texture(format, TILE6_LINEAR); desc[0] = - COND(vk_format_is_srgb(vk_format), A6XX_TEX_CONST_0_SRGB) | - A6XX_TEX_CONST_0_FMT(format.fmt) | - A6XX_TEX_CONST_0_SWAP(format.swap) | + COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) | + A6XX_TEX_CONST_0_FMT(fmt.fmt) | + A6XX_TEX_CONST_0_SWAP(fmt.swap) | A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) | // XXX to swizzle into .w for stencil buffer_to_image - A6XX_TEX_CONST_0_SWIZ_Y(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Y) | - A6XX_TEX_CONST_0_SWIZ_Z(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Z) | - A6XX_TEX_CONST_0_SWIZ_W(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_W); + A6XX_TEX_CONST_0_SWIZ_Y(format == PIPE_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Y) | + A6XX_TEX_CONST_0_SWIZ_Z(format == PIPE_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Z) | + A6XX_TEX_CONST_0_SWIZ_W(format == PIPE_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_W); desc[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height); desc[2] = A6XX_TEX_CONST_2_PITCH(pitch) | @@ -901,16 +937,22 @@ static void r3d_src_gmem(struct tu_cmd_buffer *cmd, struct tu_cs *cs, const struct tu_image_view *iview, - VkFormat format, + enum pipe_format format, uint32_t gmem_offset, uint32_t cpp) { uint32_t desc[A6XX_TEX_CONST_DWORDS]; - memcpy(desc, iview->descriptor, sizeof(desc)); - - /* patch the format so that depth/stencil get the right format */ - desc[0] &= ~A6XX_TEX_CONST_0_FMT__MASK; - desc[0] |= A6XX_TEX_CONST_0_FMT(tu6_format_texture(format, TILE6_2).fmt); + memcpy(desc, iview->view.descriptor, sizeof(desc)); + + /* patch the format so that depth/stencil get the right format and swizzle */ + desc[0] &= ~(A6XX_TEX_CONST_0_FMT__MASK | + A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK | + A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK); + desc[0] |= A6XX_TEX_CONST_0_FMT(tu6_format_texture(format, TILE6_2).fmt) | + A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) | + A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) | + A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) | + A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W); /* patched for gmem */ desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK); @@ -928,7 +970,7 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd, } static void -r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) +r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer) { tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); tu_cs_emit(cs, iview->RB_MRT_BUF_INFO); @@ -938,10 +980,29 @@ r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3); tu_cs_image_flag_ref(cs, iview, layer); + /* Use color format from RB_MRT_BUF_INFO. This register is relevant for + * FMT6_NV12_Y. + */ + tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = iview->RB_MRT_BUF_INFO & 0xff)); + tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled)); } static void +r3d_dst_depth(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); + tu_cs_emit(cs, tu_image_view_depth(iview, RB_MRT_BUF_INFO)); + tu_cs_image_depth_ref(cs, iview, layer); + tu_cs_emit(cs, 0); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3); + tu_cs_image_flag_ref(cs, &iview->view, layer); + + tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->view.ubwc_enabled)); +} + +static void r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) { tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); @@ -953,12 +1014,12 @@ r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t la } static void -r3d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch) +r3d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch) { - struct tu_native_format format = tu6_format_color(vk_format, TILE6_LINEAR); + struct tu_native_format fmt = tu6_format_color(format, TILE6_LINEAR); tu_cs_emit_regs(cs, - A6XX_RB_MRT_BUF_INFO(0, .color_format = format.fmt, .color_swap = format.swap), + A6XX_RB_MRT_BUF_INFO(0, .color_format = fmt.fmt, .color_swap = fmt.swap), A6XX_RB_MRT_PITCH(0, pitch), A6XX_RB_MRT_ARRAY_PITCH(0, 0), A6XX_RB_MRT_BASE(0, .qword = va), @@ -968,14 +1029,14 @@ r3d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch } static uint8_t -aspect_write_mask(VkFormat vk_format, VkImageAspectFlags aspect_mask) +aspect_write_mask(enum pipe_format format, VkImageAspectFlags aspect_mask) { uint8_t mask = 0xf; assert(aspect_mask); /* note: the only format with partial writing is D24S8, * clear/blit uses the _AS_R8G8B8A8 format to access it */ - if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { + if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) mask = 0x7; if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) @@ -987,18 +1048,18 @@ aspect_write_mask(VkFormat vk_format, VkImageAspectFlags aspect_mask) static void r3d_setup(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - VkFormat vk_format, + enum pipe_format format, VkImageAspectFlags aspect_mask, unsigned blit_param, bool clear, bool ubwc, VkSampleCountFlagBits samples) { - enum a6xx_format format = tu6_base_format(vk_format); + enum a6xx_format fmt = tu6_base_format(format); - if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT || - vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) { - format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; + if ((format == PIPE_FORMAT_Z24_UNORM_S8_UINT || + format == PIPE_FORMAT_Z24X8_UNORM) && ubwc) { + fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; } if (!cmd->state.pass) { @@ -1036,14 +1097,14 @@ r3d_setup(struct tu_cmd_buffer *cmd, tu_cs_emit_regs(cs, A6XX_SP_FS_RENDER_COMPONENTS(.rt0 = 0xf)); tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(0, - .color_format = format, - .color_sint = vk_format_is_sint(vk_format), - .color_uint = vk_format_is_uint(vk_format))); + .color_format = fmt, + .color_sint = util_format_is_pure_sint(format), + .color_uint = util_format_is_pure_uint(format))); tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0, - .component_enable = aspect_write_mask(vk_format, aspect_mask))); - tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format))); - tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format))); + .component_enable = aspect_write_mask(format, aspect_mask))); + tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(util_format_is_srgb(format))); + tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(util_format_is_srgb(format))); tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0)); tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0)); @@ -1084,22 +1145,24 @@ struct blit_ops { const VkOffset2D *dst, const VkOffset2D *src, const VkExtent2D *extent); - void (*clear_value)(struct tu_cs *cs, VkFormat format, const VkClearValue *val); + void (*clear_value)(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val); void (*src)( struct tu_cmd_buffer *cmd, struct tu_cs *cs, - const struct tu_image_view *iview, + const struct fdl6_view *iview, uint32_t layer, VkFilter filter); void (*src_buffer)(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - VkFormat vk_format, + enum pipe_format format, uint64_t va, uint32_t pitch, uint32_t width, uint32_t height); - void (*dst)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer); - void (*dst_buffer)(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch); + void (*dst)(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer); + void (*dst_depth)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer); + void (*dst_stencil)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer); + void (*dst_buffer)(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch); void (*setup)(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - VkFormat vk_format, + enum pipe_format format, VkImageAspectFlags aspect_mask, unsigned blit_param, /* CmdBlitImage: rotation in 2D path and z scaling in 3D path */ bool clear, @@ -1116,6 +1179,8 @@ static const struct blit_ops r2d_ops = { .src = r2d_src, .src_buffer = r2d_src_buffer, .dst = r2d_dst, + .dst_depth = r2d_dst_depth, + .dst_stencil = r2d_dst_stencil, .dst_buffer = r2d_dst_buffer, .setup = r2d_setup, .run = r2d_run, @@ -1128,6 +1193,8 @@ static const struct blit_ops r3d_ops = { .src = r3d_src, .src_buffer = r3d_src_buffer, .dst = r3d_dst, + .dst_depth = r3d_dst_depth, + .dst_stencil = r3d_dst_stencil, .dst_buffer = r3d_dst_buffer, .setup = r3d_setup, .run = r3d_run, @@ -1150,76 +1217,53 @@ coords(const struct blit_ops *ops, * compression behavior, so no just returning R8_UINT/R16_UINT/R32_UINT for * everything. */ -static VkFormat -copy_format(VkFormat format, VkImageAspectFlags aspect_mask, bool copy_buffer) -{ - if (vk_format_is_compressed(format)) { - switch (vk_format_get_blocksize(format)) { - case 1: return VK_FORMAT_R8_UINT; - case 2: return VK_FORMAT_R16_UINT; - case 4: return VK_FORMAT_R32_UINT; - case 8: return VK_FORMAT_R32G32_UINT; - case 16:return VK_FORMAT_R32G32B32A32_UINT; +static enum pipe_format +copy_format(VkFormat vk_format, VkImageAspectFlags aspect_mask, bool copy_buffer) +{ + if (vk_format_is_compressed(vk_format)) { + switch (vk_format_get_blocksize(vk_format)) { + case 1: return PIPE_FORMAT_R8_UINT; + case 2: return PIPE_FORMAT_R16_UINT; + case 4: return PIPE_FORMAT_R32_UINT; + case 8: return PIPE_FORMAT_R32G32_UINT; + case 16:return PIPE_FORMAT_R32G32B32A32_UINT; default: unreachable("unhandled format size"); } } - switch (format) { + enum pipe_format format = tu_vk_format_to_pipe_format(vk_format); + /* For SNORM formats, copy them as the equivalent UNORM format. If we treat * them as snorm then the 0x80 (-1.0 snorm8) value will get clamped to 0x81 * (also -1.0), when we're supposed to be memcpying the bits. See * https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion. */ - case VK_FORMAT_R8_SNORM: - return VK_FORMAT_R8_UNORM; - case VK_FORMAT_R8G8_SNORM: - return VK_FORMAT_R8G8_UNORM; - case VK_FORMAT_R8G8B8_SNORM: - return VK_FORMAT_R8G8B8_UNORM; - case VK_FORMAT_B8G8R8_SNORM: - return VK_FORMAT_B8G8R8_UNORM; - case VK_FORMAT_R8G8B8A8_SNORM: - return VK_FORMAT_R8G8B8A8_UNORM; - case VK_FORMAT_B8G8R8A8_SNORM: - return VK_FORMAT_B8G8R8A8_UNORM; - case VK_FORMAT_A8B8G8R8_SNORM_PACK32: - return VK_FORMAT_A8B8G8R8_UNORM_PACK32; - case VK_FORMAT_A2R10G10B10_SNORM_PACK32: - return VK_FORMAT_A2R10G10B10_UNORM_PACK32; - case VK_FORMAT_A2B10G10R10_SNORM_PACK32: - return VK_FORMAT_A2B10G10R10_UNORM_PACK32; - case VK_FORMAT_R16_SNORM: - return VK_FORMAT_R16_UNORM; - case VK_FORMAT_R16G16_SNORM: - return VK_FORMAT_R16G16_UNORM; - case VK_FORMAT_R16G16B16_SNORM: - return VK_FORMAT_R16G16B16_UNORM; - case VK_FORMAT_R16G16B16A16_SNORM: - return VK_FORMAT_R16G16B16A16_UNORM; - - case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: - return VK_FORMAT_R32_UINT; - - case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: + format = util_format_snorm_to_unorm(format); + + switch (format) { + case PIPE_FORMAT_R9G9B9E5_FLOAT: + return PIPE_FORMAT_R32_UINT; + + case PIPE_FORMAT_G8_B8R8_420_UNORM: if (aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT) - return VK_FORMAT_R8G8_UNORM; + return PIPE_FORMAT_R8G8_UNORM; else - return VK_FORMAT_R8_UNORM; - case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: - return VK_FORMAT_R8_UNORM; + return PIPE_FORMAT_Y8_UNORM; + case PIPE_FORMAT_G8_B8_R8_420_UNORM: + return PIPE_FORMAT_R8_UNORM; - case VK_FORMAT_D24_UNORM_S8_UINT: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT && copy_buffer) - return VK_FORMAT_R8_UNORM; + return PIPE_FORMAT_R8_UNORM; else return format; - case VK_FORMAT_D32_SFLOAT_S8_UINT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) - return VK_FORMAT_S8_UINT; + return PIPE_FORMAT_S8_UINT; assert(aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT); - return VK_FORMAT_D32_SFLOAT; + return PIPE_FORMAT_Z32_FLOAT; default: return format; @@ -1234,11 +1278,11 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd, { const struct blit_ops *ops = &r2d_ops; - ops->setup(cmd, cs, VK_FORMAT_D16_UNORM, VK_IMAGE_ASPECT_DEPTH_BIT, 0, true, false, + ops->setup(cmd, cs, PIPE_FORMAT_Z16_UNORM, VK_IMAGE_ASPECT_DEPTH_BIT, 0, true, false, VK_SAMPLE_COUNT_1_BIT); - ops->clear_value(cs, VK_FORMAT_D16_UNORM, value); - ops->dst_buffer(cs, VK_FORMAT_D16_UNORM, - image->bo->iova + image->bo_offset + image->lrz_offset, + ops->clear_value(cs, PIPE_FORMAT_Z16_UNORM, value); + ops->dst_buffer(cs, PIPE_FORMAT_Z16_UNORM, + image->iova + image->lrz_offset, image->lrz_pitch * 2); ops->coords(cs, &(VkOffset2D) {}, NULL, &(VkExtent2D) {image->lrz_pitch, image->lrz_height}); ops->run(cmd, cs); @@ -1246,9 +1290,9 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd, } static void -tu_image_view_copy_blit(struct tu_image_view *iview, +tu_image_view_copy_blit(struct fdl6_view *iview, struct tu_image *image, - VkFormat format, + enum pipe_format format, const VkImageSubresourceLayers *subres, uint32_t layer, bool stencil_read, @@ -1257,53 +1301,58 @@ tu_image_view_copy_blit(struct tu_image_view *iview, VkImageAspectFlags aspect_mask = subres->aspectMask; /* always use the AS_R8G8B8A8 format for these */ - if (format == VK_FORMAT_D24_UNORM_S8_UINT || - format == VK_FORMAT_X8_D24_UNORM_PACK32) { + if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT || + format == PIPE_FORMAT_Z24X8_UNORM) { aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT; } - tu_image_view_init(iview, &(VkImageViewCreateInfo) { - .image = tu_image_to_handle(image), - .viewType = z_scale ? VK_IMAGE_VIEW_TYPE_3D : VK_IMAGE_VIEW_TYPE_2D, - .format = format, - /* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */ - .components.r = stencil_read ? VK_COMPONENT_SWIZZLE_A : VK_COMPONENT_SWIZZLE_R, - .subresourceRange = { - .aspectMask = aspect_mask, - .baseMipLevel = subres->mipLevel, - .levelCount = 1, - .baseArrayLayer = subres->baseArrayLayer + layer, - .layerCount = 1, + const struct fdl_layout *layout = + &image->layout[tu6_plane_index(image->vk_format, aspect_mask)]; + + fdl6_view_init(iview, &layout, &(struct fdl_view_args) { + .iova = image->iova, + .base_array_layer = subres->baseArrayLayer + layer, + .layer_count = 1, + .base_miplevel = subres->mipLevel, + .level_count = 1, + .format = tu_format_for_aspect(format, aspect_mask), + .swiz = { + /* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */ + stencil_read ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_X, + PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, + .type = z_scale ? FDL_VIEW_TYPE_3D : FDL_VIEW_TYPE_2D, }, false); } static void -tu_image_view_copy(struct tu_image_view *iview, +tu_image_view_copy(struct fdl6_view *iview, struct tu_image *image, - VkFormat format, + enum pipe_format format, const VkImageSubresourceLayers *subres, uint32_t layer, bool stencil_read) { - format = copy_format(format, subres->aspectMask, false); tu_image_view_copy_blit(iview, image, format, subres, layer, stencil_read, false); } static void -tu_image_view_blit(struct tu_image_view *iview, +tu_image_view_blit(struct fdl6_view *iview, struct tu_image *image, const VkImageSubresourceLayers *subres, uint32_t layer) { - tu_image_view_copy_blit(iview, image, image->vk_format, subres, layer, false, false); + enum pipe_format format = + tu6_plane_format(image->vk_format, tu6_plane_index(image->vk_format, + subres->aspectMask)); + tu_image_view_copy_blit(iview, image, format, subres, layer, false, false); } static void tu6_blit_image(struct tu_cmd_buffer *cmd, struct tu_image *src_image, struct tu_image *dst_image, - const VkImageBlit *info, + const VkImageBlit2KHR *info, VkFilter filter) { const struct blit_ops *ops = &r2d_ops; @@ -1375,7 +1424,7 @@ tu6_blit_image(struct tu_cmd_buffer *cmd, trace_start_blit(&cmd->trace, cs); - ops->setup(cmd, cs, format, info->dstSubresource.aspectMask, + ops->setup(cmd, cs, tu_vk_format_to_pipe_format(format), info->dstSubresource.aspectMask, blit_param, false, dst_image->layout[0].ubwc, dst_image->layout[0].nr_samples); @@ -1399,12 +1448,16 @@ tu6_blit_image(struct tu_cmd_buffer *cmd, A6XX_GRAS_2D_SRC_BR_Y(MAX2(info->srcOffsets[0].y, info->srcOffsets[1].y) - 1)); } - struct tu_image_view dst, src; + struct fdl6_view dst, src; tu_image_view_blit(&dst, dst_image, &info->dstSubresource, MIN2(info->dstOffsets[0].z, info->dstOffsets[1].z)); if (z_scale) { - tu_image_view_copy_blit(&src, src_image, src_image->vk_format, + enum pipe_format src_format = + tu6_plane_format(src_image->vk_format, + tu6_plane_index(src_image->vk_format, + info->srcSubresource.aspectMask)); + tu_image_view_copy_blit(&src, src_image, src_format, &info->srcSubresource, 0, false, true); ops->src(cmd, cs, &src, 0, filter); } else { @@ -1432,35 +1485,30 @@ tu6_blit_image(struct tu_cmd_buffer *cmd, } VKAPI_ATTR void VKAPI_CALL -tu_CmdBlitImage(VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage dstImage, - VkImageLayout dstImageLayout, - uint32_t regionCount, - const VkImageBlit *pRegions, - VkFilter filter) +tu_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, + const VkBlitImageInfo2KHR* pBlitImageInfo) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_image, src_image, srcImage); - TU_FROM_HANDLE(tu_image, dst_image, dstImage); + TU_FROM_HANDLE(tu_image, src_image, pBlitImageInfo->srcImage); + TU_FROM_HANDLE(tu_image, dst_image, pBlitImageInfo->dstImage); - for (uint32_t i = 0; i < regionCount; ++i) { + for (uint32_t i = 0; i < pBlitImageInfo->regionCount; ++i) { /* can't blit both depth and stencil at once with D32_S8 * TODO: more advanced 3D blit path to support it instead? */ if (src_image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT || dst_image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - VkImageBlit region = pRegions[i]; - u_foreach_bit(b, pRegions[i].dstSubresource.aspectMask) { + VkImageBlit2KHR region = pBlitImageInfo->pRegions[i]; + u_foreach_bit(b, region.dstSubresource.aspectMask) { region.srcSubresource.aspectMask = BIT(b); region.dstSubresource.aspectMask = BIT(b); - tu6_blit_image(cmd, src_image, dst_image, ®ion, filter); + tu6_blit_image(cmd, src_image, dst_image, ®ion, pBlitImageInfo->filter); } continue; } - tu6_blit_image(cmd, src_image, dst_image, pRegions + i, filter); + tu6_blit_image(cmd, src_image, dst_image, pBlitImageInfo->pRegions + i, + pBlitImageInfo->filter); } } @@ -1494,12 +1542,14 @@ static void tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd, struct tu_buffer *src_buffer, struct tu_image *dst_image, - const VkBufferImageCopy *info) + const VkBufferImageCopy2KHR *info) { struct tu_cs *cs = &cmd->cs; uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount); - VkFormat src_format = + enum pipe_format src_format = copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, true); + enum pipe_format dst_format = + copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, false); const struct blit_ops *ops = &r2d_ops; /* special case for buffer to stencil */ @@ -1508,9 +1558,9 @@ tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd, ops = &r3d_ops; } - /* TODO: G8_B8R8_2PLANE_420_UNORM Y plane has different hardware format, - * which matters for UBWC. buffer_to_image/etc can fail because of this - */ + /* note: could use "R8_UNORM" when no UBWC */ + if (src_format == PIPE_FORMAT_Y8_UNORM) + ops = &r3d_ops; VkOffset3D offset = info->imageOffset; VkExtent3D extent = info->imageExtent; @@ -1519,24 +1569,23 @@ tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd, copy_compressed(dst_image->vk_format, &offset, &extent, &src_width, &src_height); - uint32_t pitch = src_width * vk_format_get_blocksize(src_format); + uint32_t pitch = src_width * util_format_get_blocksize(src_format); uint32_t layer_size = src_height * pitch; - ops->setup(cmd, cs, - copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, false), + ops->setup(cmd, cs, dst_format, info->imageSubresource.aspectMask, 0, false, dst_image->layout[0].ubwc, dst_image->layout[0].nr_samples); - struct tu_image_view dst; - tu_image_view_copy(&dst, dst_image, dst_image->vk_format, &info->imageSubresource, offset.z, false); + struct fdl6_view dst; + tu_image_view_copy(&dst, dst_image, dst_format, &info->imageSubresource, offset.z, false); for (uint32_t i = 0; i < layers; i++) { ops->dst(cs, &dst, i); - uint64_t src_va = tu_buffer_iova(src_buffer) + info->bufferOffset + layer_size * i; + uint64_t src_va = src_buffer->iova + info->bufferOffset + layer_size * i; if ((src_va & 63) || (pitch & 63)) { for (uint32_t y = 0; y < extent.height; y++) { - uint32_t x = (src_va & 63) / vk_format_get_blocksize(src_format); + uint32_t x = (src_va & 63) / util_format_get_blocksize(src_format); ops->src_buffer(cmd, cs, src_format, src_va & ~63, pitch, x + extent.width, 1); ops->coords(cs, &(VkOffset2D){offset.x, offset.y + y}, &(VkOffset2D){x}, @@ -1555,39 +1604,43 @@ tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd, } VKAPI_ATTR void VKAPI_CALL -tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkImage dstImage, - VkImageLayout dstImageLayout, - uint32_t regionCount, - const VkBufferImageCopy *pRegions) +tu_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_image, dst_image, dstImage); - TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer); + TU_FROM_HANDLE(tu_image, dst_image, pCopyBufferToImageInfo->dstImage); + TU_FROM_HANDLE(tu_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer); - for (unsigned i = 0; i < regionCount; ++i) - tu_copy_buffer_to_image(cmd, src_buffer, dst_image, pRegions + i); + for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; ++i) + tu_copy_buffer_to_image(cmd, src_buffer, dst_image, + pCopyBufferToImageInfo->pRegions + i); } static void tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd, struct tu_image *src_image, struct tu_buffer *dst_buffer, - const VkBufferImageCopy *info) + const VkBufferImageCopy2KHR *info) { struct tu_cs *cs = &cmd->cs; uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount); - VkFormat dst_format = + enum pipe_format dst_format = copy_format(src_image->vk_format, info->imageSubresource.aspectMask, true); + enum pipe_format src_format = + copy_format(src_image->vk_format, info->imageSubresource.aspectMask, false); + const struct blit_ops *ops = &r2d_ops; bool stencil_read = false; if (src_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) { + ops = &r3d_ops; stencil_read = true; } - const struct blit_ops *ops = stencil_read ? &r3d_ops : &r2d_ops; + /* note: could use "R8_UNORM" when no UBWC */ + if (dst_format == PIPE_FORMAT_Y8_UNORM) + ops = &r3d_ops; + VkOffset3D offset = info->imageOffset; VkExtent3D extent = info->imageExtent; uint32_t dst_width = info->bufferRowLength ?: extent.width; @@ -1595,22 +1648,22 @@ tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd, copy_compressed(src_image->vk_format, &offset, &extent, &dst_width, &dst_height); - uint32_t pitch = dst_width * vk_format_get_blocksize(dst_format); + uint32_t pitch = dst_width * util_format_get_blocksize(dst_format); uint32_t layer_size = pitch * dst_height; ops->setup(cmd, cs, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false, VK_SAMPLE_COUNT_1_BIT); - struct tu_image_view src; - tu_image_view_copy(&src, src_image, src_image->vk_format, &info->imageSubresource, offset.z, stencil_read); + struct fdl6_view src; + tu_image_view_copy(&src, src_image, src_format, &info->imageSubresource, offset.z, stencil_read); for (uint32_t i = 0; i < layers; i++) { ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST); - uint64_t dst_va = tu_buffer_iova(dst_buffer) + info->bufferOffset + layer_size * i; + uint64_t dst_va = dst_buffer->iova + info->bufferOffset + layer_size * i; if ((dst_va & 63) || (pitch & 63)) { for (uint32_t y = 0; y < extent.height; y++) { - uint32_t x = (dst_va & 63) / vk_format_get_blocksize(dst_format); + uint32_t x = (dst_va & 63) / util_format_get_blocksize(dst_format); ops->dst_buffer(cs, dst_format, dst_va & ~63, 0); ops->coords(cs, &(VkOffset2D) {x}, &(VkOffset2D){offset.x, offset.y + y}, &(VkExtent2D) {extent.width, 1}); @@ -1628,19 +1681,16 @@ tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd, } VKAPI_ATTR void VKAPI_CALL -tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkBuffer dstBuffer, - uint32_t regionCount, - const VkBufferImageCopy *pRegions) +tu_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2KHR* pCopyImageToBufferInfo) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_image, src_image, srcImage); - TU_FROM_HANDLE(tu_buffer, dst_buffer, dstBuffer); + TU_FROM_HANDLE(tu_image, src_image, pCopyImageToBufferInfo->srcImage); + TU_FROM_HANDLE(tu_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer); - for (unsigned i = 0; i < regionCount; ++i) - tu_copy_image_to_buffer(cmd, src_image, dst_buffer, pRegions + i); + for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; ++i) + tu_copy_image_to_buffer(cmd, src_image, dst_buffer, + pCopyImageToBufferInfo->pRegions + i); } /* Tiled formats don't support swapping, which means that we can't support @@ -1654,7 +1704,7 @@ tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, */ static bool -is_swapped_format(VkFormat format) +is_swapped_format(enum pipe_format format) { struct tu_native_format linear = tu6_format_texture(format, TILE6_LINEAR); struct tu_native_format tiled = tu6_format_texture(format, TILE6_3); @@ -1676,7 +1726,7 @@ static void tu_copy_image_to_image(struct tu_cmd_buffer *cmd, struct tu_image *src_image, struct tu_image *dst_image, - const VkImageCopy *info) + const VkImageCopy2KHR *info) { const struct blit_ops *ops = &r2d_ops; struct tu_cs *cs = &cmd->cs; @@ -1684,7 +1734,7 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd, if (dst_image->layout[0].nr_samples > 1) ops = &r3d_ops; - VkFormat format = VK_FORMAT_UNDEFINED; + enum pipe_format format = PIPE_FORMAT_NONE; VkOffset3D src_offset = info->srcOffset; VkOffset3D dst_offset = info->dstOffset; VkExtent3D extent = info->extent; @@ -1709,8 +1759,13 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd, copy_compressed(src_image->vk_format, &src_offset, &extent, NULL, NULL); copy_compressed(dst_image->vk_format, &dst_offset, NULL, NULL, NULL); - VkFormat dst_format = copy_format(dst_image->vk_format, info->dstSubresource.aspectMask, false); - VkFormat src_format = copy_format(src_image->vk_format, info->srcSubresource.aspectMask, false); + enum pipe_format dst_format = copy_format(dst_image->vk_format, info->dstSubresource.aspectMask, false); + enum pipe_format src_format = copy_format(src_image->vk_format, info->srcSubresource.aspectMask, false); + + /* note: could use "R8_UNORM" when no UBWC */ + if (dst_format == PIPE_FORMAT_Y8_UNORM || + src_format == PIPE_FORMAT_Y8_UNORM) + ops = &r3d_ops; bool use_staging_blit = false; @@ -1748,54 +1803,50 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd, use_staging_blit = true; } - struct tu_image_view dst, src; + struct fdl6_view dst, src; if (use_staging_blit) { tu_image_view_copy(&dst, dst_image, dst_format, &info->dstSubresource, dst_offset.z, false); tu_image_view_copy(&src, src_image, src_format, &info->srcSubresource, src_offset.z, false); - struct tu_image staging_image = { - .base.type = VK_OBJECT_TYPE_IMAGE, - .vk_format = src_format, - .level_count = 1, - .layer_count = info->srcSubresource.layerCount, - .bo_offset = 0, - }; - - VkImageSubresourceLayers staging_subresource = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = info->srcSubresource.layerCount, - }; - + struct fdl_layout staging_layout = { 0 }; VkOffset3D staging_offset = { 0 }; - staging_image.layout[0].tile_mode = TILE6_LINEAR; - staging_image.layout[0].ubwc = false; + staging_layout.tile_mode = TILE6_LINEAR; + staging_layout.ubwc = false; - fdl6_layout(&staging_image.layout[0], - vk_format_to_pipe_format(staging_image.vk_format), + fdl6_layout(&staging_layout, + src_format, src_image->layout[0].nr_samples, extent.width, extent.height, extent.depth, - staging_image.level_count, - staging_image.layer_count, + 1, + info->srcSubresource.layerCount, extent.depth > 1, NULL); + struct tu_bo *staging_bo; VkResult result = tu_get_scratch_bo(cmd->device, - staging_image.layout[0].size, - &staging_image.bo); + staging_layout.size, + &staging_bo); if (result != VK_SUCCESS) { cmd->record_result = result; return; } - struct tu_image_view staging; - tu_image_view_copy(&staging, &staging_image, src_format, - &staging_subresource, 0, false); + struct fdl6_view staging; + const struct fdl_layout *staging_layout_ptr = &staging_layout; + fdl6_view_init(&staging, &staging_layout_ptr, &(struct fdl_view_args) { + .iova = staging_bo->iova, + .base_array_layer = 0, + .layer_count = 1, + .base_miplevel = 0, + .level_count = info->srcSubresource.layerCount, + .format = tu_format_for_aspect(src_format, VK_IMAGE_ASPECT_COLOR_BIT), + .swiz = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, + .type = FDL_VIEW_TYPE_2D, + }, false); ops->setup(cmd, cs, src_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false, dst_image->layout[0].nr_samples); @@ -1814,8 +1865,16 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd, tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); tu_cs_emit_wfi(cs); - tu_image_view_copy(&staging, &staging_image, dst_format, - &staging_subresource, 0, false); + fdl6_view_init(&staging, &staging_layout_ptr, &(struct fdl_view_args) { + .iova = staging_bo->iova, + .base_array_layer = 0, + .layer_count = 1, + .base_miplevel = 0, + .level_count = info->srcSubresource.layerCount, + .format = tu_format_for_aspect(dst_format, VK_IMAGE_ASPECT_COLOR_BIT), + .swiz = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, + .type = FDL_VIEW_TYPE_2D, + }, false); ops->setup(cmd, cs, dst_format, info->dstSubresource.aspectMask, 0, false, dst_image->layout[0].ubwc, @@ -1847,22 +1906,17 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd, } VKAPI_ATTR void VKAPI_CALL -tu_CmdCopyImage(VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageCopy *pRegions) +tu_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, + const VkCopyImageInfo2KHR* pCopyImageInfo) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_image, src_image, srcImage); - TU_FROM_HANDLE(tu_image, dst_image, destImage); + TU_FROM_HANDLE(tu_image, src_image, pCopyImageInfo->srcImage); + TU_FROM_HANDLE(tu_image, dst_image, pCopyImageInfo->dstImage); - for (uint32_t i = 0; i < regionCount; ++i) { + for (uint32_t i = 0; i < pCopyImageInfo->regionCount; ++i) { if (src_image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - VkImageCopy info = pRegions[i]; - u_foreach_bit(b, pRegions[i].dstSubresource.aspectMask) { + VkImageCopy2KHR info = pCopyImageInfo->pRegions[i]; + u_foreach_bit(b, info.dstSubresource.aspectMask) { info.srcSubresource.aspectMask = BIT(b); info.dstSubresource.aspectMask = BIT(b); tu_copy_image_to_image(cmd, src_image, dst_image, &info); @@ -1870,7 +1924,8 @@ tu_CmdCopyImage(VkCommandBuffer commandBuffer, continue; } - tu_copy_image_to_image(cmd, src_image, dst_image, pRegions + i); + tu_copy_image_to_image(cmd, src_image, dst_image, + pCopyImageInfo->pRegions + i); } } @@ -1883,7 +1938,7 @@ copy_buffer(struct tu_cmd_buffer *cmd, { const struct blit_ops *ops = &r2d_ops; struct tu_cs *cs = &cmd->cs; - VkFormat format = block_size == 4 ? VK_FORMAT_R32_UINT : VK_FORMAT_R8_UNORM; + enum pipe_format format = block_size == 4 ? PIPE_FORMAT_R32_UINT : PIPE_FORMAT_R8_UNORM; uint64_t blocks = size / block_size; ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false, @@ -1908,21 +1963,19 @@ copy_buffer(struct tu_cmd_buffer *cmd, } VKAPI_ATTR void VKAPI_CALL -tu_CmdCopyBuffer(VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkBuffer dstBuffer, - uint32_t regionCount, - const VkBufferCopy *pRegions) +tu_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2KHR *pCopyBufferInfo) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer); - TU_FROM_HANDLE(tu_buffer, dst_buffer, dstBuffer); + TU_FROM_HANDLE(tu_buffer, src_buffer, pCopyBufferInfo->srcBuffer); + TU_FROM_HANDLE(tu_buffer, dst_buffer, pCopyBufferInfo->dstBuffer); - for (unsigned i = 0; i < regionCount; ++i) { + for (unsigned i = 0; i < pCopyBufferInfo->regionCount; ++i) { + const VkBufferCopy2KHR *region = &pCopyBufferInfo->pRegions[i]; copy_buffer(cmd, - tu_buffer_iova(dst_buffer) + pRegions[i].dstOffset, - tu_buffer_iova(src_buffer) + pRegions[i].srcOffset, - pRegions[i].size, 1); + dst_buffer->iova + region->dstOffset, + src_buffer->iova + region->srcOffset, + region->size, 1); } } @@ -1944,7 +1997,7 @@ tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer, } memcpy(tmp.map, pData, dataSize); - copy_buffer(cmd, tu_buffer_iova(buffer) + dstOffset, tmp.iova, dataSize, 4); + copy_buffer(cmd, buffer->iova + dstOffset, tmp.iova, dataSize, 4); } VKAPI_ATTR void VKAPI_CALL @@ -1962,18 +2015,18 @@ tu_CmdFillBuffer(VkCommandBuffer commandBuffer, if (fillSize == VK_WHOLE_SIZE) fillSize = buffer->size - dstOffset; - uint64_t dst_va = tu_buffer_iova(buffer) + dstOffset; + uint64_t dst_va = buffer->iova + dstOffset; uint32_t blocks = fillSize / 4; - ops->setup(cmd, cs, VK_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false, + ops->setup(cmd, cs, PIPE_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false, VK_SAMPLE_COUNT_1_BIT); - ops->clear_value(cs, VK_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}}); + ops->clear_value(cs, PIPE_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}}); while (blocks) { uint32_t dst_x = (dst_va & 63) / 4; uint32_t width = MIN2(blocks, 0x4000 - dst_x); - ops->dst_buffer(cs, VK_FORMAT_R32_UINT, dst_va & ~63, 0); + ops->dst_buffer(cs, PIPE_FORMAT_R32_UINT, dst_va & ~63, 0); ops->coords(cs, &(VkOffset2D) {dst_x}, NULL, &(VkExtent2D) {width, 1}); ops->run(cmd, cs); @@ -1985,25 +2038,21 @@ tu_CmdFillBuffer(VkCommandBuffer commandBuffer, } VKAPI_ATTR void VKAPI_CALL -tu_CmdResolveImage(VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage dstImage, - VkImageLayout dstImageLayout, - uint32_t regionCount, - const VkImageResolve *pRegions) +tu_CmdResolveImage2KHR(VkCommandBuffer commandBuffer, + const VkResolveImageInfo2KHR* pResolveImageInfo) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_image, src_image, srcImage); - TU_FROM_HANDLE(tu_image, dst_image, dstImage); + TU_FROM_HANDLE(tu_image, src_image, pResolveImageInfo->srcImage); + TU_FROM_HANDLE(tu_image, dst_image, pResolveImageInfo->dstImage); const struct blit_ops *ops = &r2d_ops; struct tu_cs *cs = &cmd->cs; - ops->setup(cmd, cs, dst_image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT, - 0, false, dst_image->layout[0].ubwc, VK_SAMPLE_COUNT_1_BIT); + ops->setup(cmd, cs, tu_vk_format_to_pipe_format(dst_image->vk_format), + VK_IMAGE_ASPECT_COLOR_BIT, 0, false, dst_image->layout[0].ubwc, + VK_SAMPLE_COUNT_1_BIT); - for (uint32_t i = 0; i < regionCount; ++i) { - const VkImageResolve *info = &pRegions[i]; + for (uint32_t i = 0; i < pResolveImageInfo->regionCount; ++i) { + const VkImageResolve2KHR *info = &pResolveImageInfo->pRegions[i]; uint32_t layers = MAX2(info->extent.depth, info->dstSubresource.layerCount); assert(info->srcSubresource.layerCount == info->dstSubresource.layerCount); @@ -2011,7 +2060,7 @@ tu_CmdResolveImage(VkCommandBuffer commandBuffer, coords(ops, cs, &info->dstOffset, &info->srcOffset, &info->extent); - struct tu_image_view dst, src; + struct fdl6_view dst, src; tu_image_view_blit(&dst, dst_image, &info->dstSubresource, info->dstOffset.z); tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffset.z); @@ -2040,23 +2089,29 @@ resolve_sysmem(struct tu_cmd_buffer *cmd, uint32_t layer_mask, uint32_t layers, const VkRect2D *rect, - bool separate_stencil) + bool separate_ds) { const struct blit_ops *ops = &r2d_ops; trace_start_sysmem_resolve(&cmd->trace, cs); - ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, - 0, false, dst->ubwc_enabled, VK_SAMPLE_COUNT_1_BIT); + ops->setup(cmd, cs, tu_vk_format_to_pipe_format(format), + VK_IMAGE_ASPECT_COLOR_BIT, 0, false, dst->view.ubwc_enabled, + VK_SAMPLE_COUNT_1_BIT); ops->coords(cs, &rect->offset, &rect->offset, &rect->extent); for_each_layer(i, layer_mask, layers) { - if (separate_stencil) { - r2d_src_stencil(cmd, cs, src, i, VK_FILTER_NEAREST); - r2d_dst_stencil(cs, dst, i); + if (separate_ds) { + if (format == VK_FORMAT_D32_SFLOAT) { + r2d_src_depth(cmd, cs, src, i, VK_FILTER_NEAREST); + ops->dst_depth(cs, dst, i); + } else { + r2d_src_stencil(cmd, cs, src, i, VK_FILTER_NEAREST); + ops->dst_stencil(cs, dst, i); + } } else { - ops->src(cmd, cs, src, i, VK_FILTER_NEAREST); - ops->dst(cs, dst, i); + ops->src(cmd, cs, &src->view, i, VK_FILTER_NEAREST); + ops->dst(cs, &dst->view, i); } ops->run(cmd, cs); } @@ -2079,7 +2134,7 @@ tu_resolve_sysmem(struct tu_cmd_buffer *cmd, if (dst->image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { resolve_sysmem(cmd, cs, VK_FORMAT_D32_SFLOAT, - src, dst, layer_mask, layers, rect, false); + src, dst, layer_mask, layers, rect, true); resolve_sysmem(cmd, cs, VK_FORMAT_S8_UINT, src, dst, layer_mask, layers, rect, true); } else { @@ -2098,9 +2153,14 @@ clear_image(struct tu_cmd_buffer *cmd, uint32_t level_count = tu_get_levelCount(image, range); uint32_t layer_count = tu_get_layerCount(image, range); struct tu_cs *cs = &cmd->cs; - VkFormat format = image->vk_format; - if (format == VK_FORMAT_D32_SFLOAT_S8_UINT || format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) - format = copy_format(format, aspect_mask, false); + enum pipe_format format; + if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) { + format = PIPE_FORMAT_R32_UINT; + } else { + format = tu6_plane_format(image->vk_format, + tu6_plane_index(image->vk_format, + aspect_mask)); + } if (image->layout[0].depth0 > 1) { assert(layer_count == 1); @@ -2112,7 +2172,7 @@ clear_image(struct tu_cmd_buffer *cmd, ops->setup(cmd, cs, format, aspect_mask, 0, true, image->layout[0].ubwc, image->layout[0].nr_samples); if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) - ops->clear_value(cs, VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, clear_value); + ops->clear_value(cs, PIPE_FORMAT_R9G9B9E5_FLOAT, clear_value); else ops->clear_value(cs, format, clear_value); @@ -2125,7 +2185,7 @@ clear_image(struct tu_cmd_buffer *cmd, u_minify(image->layout[0].height0, range->baseMipLevel + j) }); - struct tu_image_view dst; + struct fdl6_view dst; tu_image_view_copy_blit(&dst, image, format, &(VkImageSubresourceLayers) { .aspectMask = aspect_mask, .mipLevel = range->baseMipLevel + j, @@ -2338,21 +2398,21 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, } static void -pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t clear_value[4]) +pack_gmem_clear_value(const VkClearValue *val, enum pipe_format format, uint32_t clear_value[4]) { switch (format) { - case VK_FORMAT_X8_D24_UNORM_PACK32: - case VK_FORMAT_D24_UNORM_S8_UINT: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24) | val->depthStencil.stencil << 24; return; - case VK_FORMAT_D16_UNORM: + case PIPE_FORMAT_Z16_UNORM: clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 16); return; - case VK_FORMAT_D32_SFLOAT: + case PIPE_FORMAT_Z32_FLOAT: clear_value[0] = fui(val->depthStencil.depth); return; - case VK_FORMAT_S8_UINT: + case PIPE_FORMAT_S8_UINT: clear_value[0] = val->depthStencil.stencil; return; default: @@ -2361,33 +2421,33 @@ pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t clear_v float tmp[4]; memcpy(tmp, val->color.float32, 4 * sizeof(float)); - if (vk_format_is_srgb(format)) { + if (util_format_is_srgb(format)) { for (int i = 0; i < 3; i++) tmp[i] = util_format_linear_to_srgb_float(tmp[i]); } #define PACK_F(type) util_format_##type##_pack_rgba_float \ ( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1) - switch (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) { + switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) { case 4: PACK_F(r4g4b4a4_unorm); break; case 5: - if (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_Y) == 6) + if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_Y) == 6) PACK_F(r5g6b5_unorm); else PACK_F(r5g5b5a1_unorm); break; case 8: - if (vk_format_is_snorm(format)) + if (util_format_is_snorm(format)) PACK_F(r8g8b8a8_snorm); - else if (vk_format_is_unorm(format)) + else if (util_format_is_unorm(format)) PACK_F(r8g8b8a8_unorm); else pack_int8(clear_value, val->color.uint32); break; case 10: - if (vk_format_is_int(format)) + if (util_format_is_pure_integer(format)) pack_int10_2(clear_value, val->color.uint32); else PACK_F(r10g10b10a2_unorm); @@ -2396,11 +2456,11 @@ pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t clear_v clear_value[0] = float3_to_r11g11b10f(val->color.float32); break; case 16: - if (vk_format_is_snorm(format)) + if (util_format_is_snorm(format)) PACK_F(r16g16b16a16_snorm); - else if (vk_format_is_unorm(format)) + else if (util_format_is_unorm(format)) PACK_F(r16g16b16a16_unorm); - else if (vk_format_is_float(format)) + else if (util_format_is_float(format)) PACK_F(r16g16b16a16_float); else pack_int16(clear_value, val->color.uint32); @@ -2417,7 +2477,7 @@ pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t clear_v static void clear_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - VkFormat format, + enum pipe_format format, uint8_t clear_mask, uint32_t gmem_offset, const VkClearValue *value) @@ -2454,15 +2514,16 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, trace_start_gmem_clear(&cmd->trace, cs); + enum pipe_format format = tu_vk_format_to_pipe_format(att->format); if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (mask & VK_IMAGE_ASPECT_DEPTH_BIT) - clear_gmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, 0xf, att->gmem_offset, value); + clear_gmem_attachment(cmd, cs, PIPE_FORMAT_Z32_FLOAT, 0xf, att->gmem_offset, value); if (mask & VK_IMAGE_ASPECT_STENCIL_BIT) - clear_gmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, 0xf, att->gmem_offset_stencil, value); + clear_gmem_attachment(cmd, cs, PIPE_FORMAT_S8_UINT, 0xf, att->gmem_offset_stencil, value); return; } - clear_gmem_attachment(cmd, cs, att->format, aspect_write_mask(att->format, mask), att->gmem_offset, value); + clear_gmem_attachment(cmd, cs, format, aspect_write_mask(format, mask), att->gmem_offset, value); trace_end_gmem_clear(&cmd->trace, cs, att->format, att->samples); } @@ -2554,12 +2615,13 @@ tu_CmdClearAttachments(VkCommandBuffer commandBuffer, static void clear_sysmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - VkFormat format, + VkFormat vk_format, VkImageAspectFlags clear_mask, const VkRenderPassBeginInfo *info, uint32_t a, - bool separate_stencil) + bool separate_ds) { + enum pipe_format format = tu_vk_format_to_pipe_format(vk_format); const struct tu_framebuffer *fb = cmd->state.framebuffer; const struct tu_image_view *iview = cmd->state.attachments[a]; const uint32_t clear_views = cmd->state.pass->attachments[a].clear_views; @@ -2569,19 +2631,20 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd, trace_start_sysmem_clear(&cmd->trace, cs); - ops->setup(cmd, cs, format, clear_mask, 0, true, iview->ubwc_enabled, + ops->setup(cmd, cs, format, clear_mask, 0, true, iview->view.ubwc_enabled, cmd->state.pass->attachments[a].samples); ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent); ops->clear_value(cs, format, &info->pClearValues[a]); for_each_layer(i, clear_views, fb->layers) { - if (separate_stencil) { - if (ops == &r3d_ops) - r3d_dst_stencil(cs, iview, i); - else - r2d_dst_stencil(cs, iview, i); + if (separate_ds) { + if (vk_format == VK_FORMAT_D32_SFLOAT) { + ops->dst_depth(cs, iview, i); + } else { + ops->dst_stencil(cs, iview, i); + } } else { - ops->dst(cs, iview, i); + ops->dst(cs, &iview->view, i); } ops->run(cmd, cs); } @@ -2589,7 +2652,7 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd, ops->teardown(cmd, cs); trace_end_sysmem_clear(&cmd->trace, cs, - format, ops == &r3d_ops, + vk_format, ops == &r3d_ops, cmd->state.pass->attachments[a].samples); } @@ -2608,7 +2671,7 @@ tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd, if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (attachment->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT) { clear_sysmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT, - info, a, false); + info, a, true); } if (attachment->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT) { clear_sysmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, VK_IMAGE_ASPECT_COLOR_BIT, @@ -2630,6 +2693,7 @@ tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd, */ if (vk_format_is_depth_or_stencil(attachment->format)) { tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); + tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS); tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH); } else { tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); @@ -2672,23 +2736,35 @@ tu_emit_blit(struct tu_cmd_buffer *cmd, tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO( .unk0 = !resolve, .gmem = !resolve, - .sample_0 = vk_format_is_int(attachment->format) | + .sample_0 = vk_format_is_int(attachment->format) || vk_format_is_depth_or_stencil(attachment->format))); tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4); - if (separate_stencil) { - tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS); - tu_cs_emit_qw(cs, iview->stencil_base_addr); - tu_cs_emit(cs, iview->stencil_PITCH); + if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + if (!separate_stencil) { + tu_cs_emit(cs, tu_image_view_depth(iview, RB_BLIT_DST_INFO)); + tu_cs_emit_qw(cs, iview->depth_base_addr); + tu_cs_emit(cs, iview->depth_PITCH); - tu_cs_emit_regs(cs, - A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset_stencil)); + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3); + tu_cs_image_flag_ref(cs, &iview->view, 0); + + tu_cs_emit_regs(cs, + A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset)); + } else { + tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS); + tu_cs_emit_qw(cs, iview->stencil_base_addr); + tu_cs_emit(cs, iview->stencil_PITCH); + + tu_cs_emit_regs(cs, + A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset_stencil)); + } } else { - tu_cs_emit(cs, iview->RB_BLIT_DST_INFO); - tu_cs_image_ref_2d(cs, iview, 0, false); + tu_cs_emit(cs, iview->view.RB_BLIT_DST_INFO); + tu_cs_image_ref_2d(cs, &iview->view, 0, false); tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3); - tu_cs_image_flag_ref(cs, iview, 0); + tu_cs_image_flag_ref(cs, &iview->view, 0); tu_cs_emit_regs(cs, A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset)); @@ -2759,25 +2835,31 @@ store_cp_blit(struct tu_cmd_buffer *cmd, const struct tu_image_view *iview, uint32_t samples, bool separate_stencil, - VkFormat format, + enum pipe_format format, uint32_t gmem_offset, uint32_t cpp) { r2d_setup_common(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, - iview->ubwc_enabled, true); - if (separate_stencil) - r2d_dst_stencil(cs, iview, 0); - else - r2d_dst(cs, iview, 0); + iview->view.ubwc_enabled, true); + + if (iview->image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + if (!separate_stencil) { + r2d_dst_depth(cs, iview, 0); + } else { + r2d_dst_stencil(cs, iview, 0); + } + } else { + r2d_dst(cs, &iview->view, 0); + } tu_cs_emit_regs(cs, A6XX_SP_PS_2D_SRC_INFO( .color_format = tu6_format_texture(format, TILE6_2).fmt, .tile_mode = TILE6_2, - .srgb = vk_format_is_srgb(format), + .srgb = util_format_is_srgb(format), .samples = tu_msaa_samples(samples), - .samples_average = !vk_format_is_int(format) && - !vk_format_is_depth_or_stencil(format), + .samples_average = !util_format_is_pure_integer(format) && + !util_format_is_depth_or_stencil(format), .unk20 = 1, .unk22 = 1), /* note: src size does not matter when not scaling */ @@ -2807,26 +2889,45 @@ store_3d_blit(struct tu_cmd_buffer *cmd, const struct tu_image_view *iview, uint32_t dst_samples, bool separate_stencil, - VkFormat format, + enum pipe_format format, const VkRect2D *render_area, uint32_t gmem_offset, uint32_t cpp) { + /* RB_BIN_CONTROL/GRAS_BIN_CONTROL are normally only set once and they + * aren't set until we know whether we're HW binning or not, and we want to + * avoid a dependence on that here to be able to store attachments before + * the end of the renderpass in the future. Use the scratch space to + * save/restore them dynamically. + */ + tu_cs_emit_pkt7(cs, CP_REG_TO_SCRATCH, 1); + tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(REG_A6XX_RB_BIN_CONTROL) | + CP_REG_TO_SCRATCH_0_SCRATCH(0) | + CP_REG_TO_SCRATCH_0_CNT(1 - 1)); + r3d_setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, - iview->ubwc_enabled, dst_samples); + iview->view.ubwc_enabled, dst_samples); r3d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); - if (separate_stencil) - r3d_dst_stencil(cs, iview, 0); - else - r3d_dst(cs, iview, 0); + if (iview->image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + if (!separate_stencil) { + r3d_dst_depth(cs, iview, 0); + } else { + r3d_dst_stencil(cs, iview, 0); + } + } else { + r3d_dst(cs, &iview->view, 0); + } r3d_src_gmem(cmd, cs, iview, format, gmem_offset, cpp); /* sync GMEM writes with CACHE. */ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); + /* Wait for CACHE_INVALIDATE to land */ + tu_cs_emit_wfi(cs); + r3d_run(cmd, cs); /* Draws write to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to @@ -2835,6 +2936,17 @@ store_3d_blit(struct tu_cmd_buffer *cmd, * writes to depth images as a color RT, so there's no need to flush depth. */ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); + + /* Restore RB_BIN_CONTROL/GRAS_BIN_CONTROL saved above. */ + tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); + tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_RB_BIN_CONTROL) | + CP_SCRATCH_TO_REG_0_SCRATCH(0) | + CP_SCRATCH_TO_REG_0_CNT(1 - 1)); + + tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); + tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_GRAS_BIN_CONTROL) | + CP_SCRATCH_TO_REG_0_SCRATCH(0) | + CP_SCRATCH_TO_REG_0_CNT(1 - 1)); } void @@ -2862,13 +2974,17 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, * required y padding in the layout (except for the last level) */ bool need_y2_align = - y2 != iview->extent.height || iview->need_y2_align; + y2 != iview->view.height || iview->view.need_y2_align; bool unaligned = x1 % phys_dev->info->gmem_align_w || - (x2 % phys_dev->info->gmem_align_w && x2 != iview->extent.width) || + (x2 % phys_dev->info->gmem_align_w && x2 != iview->view.width) || y1 % phys_dev->info->gmem_align_h || (y2 % phys_dev->info->gmem_align_h && need_y2_align); + /* Unaligned store is incredibly rare in CTS, we have to force it to test. */ + if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_UNALIGNED_STORE)) + unaligned = true; + /* D32_SFLOAT_S8_UINT is quite special format: it has two planes, * one for depth and other for stencil. When resolving a MSAA * D32_SFLOAT_S8_UINT to S8_UINT, we need to take that into account. @@ -2877,22 +2993,25 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, src->format == VK_FORMAT_D32_SFLOAT_S8_UINT && dst->format == VK_FORMAT_S8_UINT; + bool store_common = dst->store && !resolve_d32s8_s8; + bool store_separate_stencil = dst->store_stencil || resolve_d32s8_s8; + trace_start_gmem_store(&cmd->trace, cs); /* use fast path when render area is aligned, except for unsupported resolve cases */ if (!unaligned && (a == gmem_a || blit_can_resolve(dst->format))) { - if (dst->store) - tu_emit_blit(cmd, cs, iview, src, true, resolve_d32s8_s8); - if (dst->store_stencil) + if (store_common) + tu_emit_blit(cmd, cs, iview, src, true, false); + if (store_separate_stencil) tu_emit_blit(cmd, cs, iview, src, true, true); trace_end_gmem_store(&cmd->trace, cs, dst->format, true, false); return; } - VkFormat format = src->format; - if (format == VK_FORMAT_D32_SFLOAT_S8_UINT) - format = VK_FORMAT_D32_SFLOAT; + enum pipe_format format = tu_vk_format_to_pipe_format(src->format); + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + format = PIPE_FORMAT_Z32_FLOAT; if (dst->samples > 1) { /* If we hit this path, we have to disable draw states after every tile @@ -2902,26 +3021,26 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, * TODO: store a flag somewhere so we don't do this more than once and * don't do it after the renderpass when this happens. */ - if (dst->store || dst->store_stencil) + if (store_common || store_separate_stencil) tu_disable_draw_states(cmd, cs); - if (dst->store) { - store_3d_blit(cmd, cs, iview, dst->samples, resolve_d32s8_s8, format, + if (store_common) { + store_3d_blit(cmd, cs, iview, dst->samples, false, format, render_area, src->gmem_offset, src->cpp); } - if (dst->store_stencil) { - store_3d_blit(cmd, cs, iview, dst->samples, true, VK_FORMAT_S8_UINT, - render_area, src->gmem_offset, src->samples); + if (store_separate_stencil) { + store_3d_blit(cmd, cs, iview, dst->samples, true, PIPE_FORMAT_S8_UINT, + render_area, src->gmem_offset_stencil, src->samples); } } else { r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); - if (dst->store) { - store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format, + if (store_common) { + store_cp_blit(cmd, cs, iview, src->samples, false, format, src->gmem_offset, src->cpp); } - if (dst->store_stencil) { - store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT, + if (store_separate_stencil) { + store_cp_blit(cmd, cs, iview, src->samples, true, PIPE_FORMAT_S8_UINT, src->gmem_offset_stencil, src->samples); } } diff --git a/lib/mesa/src/freedreno/vulkan/tu_legacy.c b/lib/mesa/src/freedreno/vulkan/tu_legacy.c deleted file mode 100644 index 8209a96b0..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_legacy.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright 2020 Valve Corporation - * SPDX-License-Identifier: MIT - * - * Authors: - * Jonathan Marek <jonathan@marek.ca> - */ - -#include <vulkan/vulkan.h> -#include <vulkan/vk_android_native_buffer.h> /* android tu_entrypoints.h depends on this */ -#include <assert.h> - -#include "tu_entrypoints.h" -#include "vk_util.h" - -void -tu_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice pdev, - uint32_t *count, - VkQueueFamilyProperties *props) -{ - if (!props) - return tu_GetPhysicalDeviceQueueFamilyProperties2(pdev, count, NULL); - - VkQueueFamilyProperties2 props2[*count]; - for (uint32_t i = 0; i < *count; i++) { - props2[i].sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2; - props2[i].pNext = NULL; - } - tu_GetPhysicalDeviceQueueFamilyProperties2(pdev, count, props2); - for (uint32_t i = 0; i < *count; i++) - props[i] = props2[i].queueFamilyProperties; -} - -void -tu_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice pdev, - VkFormat format, - VkImageType type, - VkSampleCountFlagBits samples, - VkImageUsageFlags usage, - VkImageTiling tiling, - uint32_t *count, - VkSparseImageFormatProperties *props) -{ - const VkPhysicalDeviceSparseImageFormatInfo2 info = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, - .format = format, - .type = type, - .samples = samples, - .usage = usage, - .tiling = tiling, - }; - - if (!props) - return tu_GetPhysicalDeviceSparseImageFormatProperties2(pdev, &info, count, NULL); - - VkSparseImageFormatProperties2 props2[*count]; - for (uint32_t i = 0; i < *count; i++) { - props2[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_FORMAT_PROPERTIES_2; - props2[i].pNext = NULL; - } - tu_GetPhysicalDeviceSparseImageFormatProperties2(pdev, &info, count, props2); - for (uint32_t i = 0; i < *count; i++) - props[i] = props2[i].properties; -} - -void -tu_GetImageSparseMemoryRequirements(VkDevice device, - VkImage image, - uint32_t *count, - VkSparseImageMemoryRequirements *reqs) -{ - const VkImageSparseMemoryRequirementsInfo2 info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_SPARSE_MEMORY_REQUIREMENTS_INFO_2, - .image = image - }; - - if (!reqs) - return tu_GetImageSparseMemoryRequirements2(device, &info, count, NULL); - - VkSparseImageMemoryRequirements2 reqs2[*count]; - for (uint32_t i = 0; i < *count; i++) { - reqs2[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2; - reqs2[i].pNext = NULL; - } - tu_GetImageSparseMemoryRequirements2(device, &info, count, reqs2); - for (uint32_t i = 0; i < *count; i++) - reqs[i] = reqs2[i].memoryRequirements; -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_wsi_display.c b/lib/mesa/src/freedreno/vulkan/tu_wsi_display.c deleted file mode 100644 index 9a9696d93..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_wsi_display.c +++ /dev/null @@ -1,339 +0,0 @@ -/* - * Copyright © 2017 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that copyright - * notice and this permission notice appear in supporting documentation, and - * that the name of the copyright holders not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. The copyright holders make no representations - * about the suitability of this software for any purpose. It is provided "as - * is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE - * OF THIS SOFTWARE. - */ - -#include <stdbool.h> -#include <string.h> -#include <unistd.h> -#include <fcntl.h> -#include "tu_private.h" -#include "tu_cs.h" -#include "util/disk_cache.h" -#include "util/strtod.h" -#include "vk_util.h" -#include <xf86drm.h> -#include <xf86drmMode.h> -#include "vk_format.h" -#include "util/debug.h" -#include "wsi_common_display.h" - -VkResult -tu_GetPhysicalDeviceDisplayPropertiesKHR(VkPhysicalDevice physical_device, - uint32_t *property_count, - VkDisplayPropertiesKHR *properties) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device); - - return wsi_display_get_physical_device_display_properties( - physical_device, - &pdevice->wsi_device, - property_count, - properties); -} - -VkResult -tu_GetPhysicalDeviceDisplayProperties2KHR(VkPhysicalDevice physical_device, - uint32_t *property_count, - VkDisplayProperties2KHR *properties) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device); - - return wsi_display_get_physical_device_display_properties2( - physical_device, - &pdevice->wsi_device, - property_count, - properties); -} - -VkResult -tu_GetPhysicalDeviceDisplayPlanePropertiesKHR( - VkPhysicalDevice physical_device, - uint32_t *property_count, - VkDisplayPlanePropertiesKHR *properties) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device); - - return wsi_display_get_physical_device_display_plane_properties( - physical_device, - &pdevice->wsi_device, - property_count, - properties); -} - -VkResult -tu_GetPhysicalDeviceDisplayPlaneProperties2KHR( - VkPhysicalDevice physical_device, - uint32_t *property_count, - VkDisplayPlaneProperties2KHR *properties) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device); - - return wsi_display_get_physical_device_display_plane_properties2( - physical_device, - &pdevice->wsi_device, - property_count, - properties); -} - -VkResult -tu_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physical_device, - uint32_t plane_index, - uint32_t *display_count, - VkDisplayKHR *displays) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device); - - return wsi_display_get_display_plane_supported_displays( - physical_device, - &pdevice->wsi_device, - plane_index, - display_count, - displays); -} - - -VkResult -tu_GetDisplayModePropertiesKHR(VkPhysicalDevice physical_device, - VkDisplayKHR display, - uint32_t *property_count, - VkDisplayModePropertiesKHR *properties) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device); - - return wsi_display_get_display_mode_properties(physical_device, - &pdevice->wsi_device, - display, - property_count, - properties); -} - -VkResult -tu_GetDisplayModeProperties2KHR(VkPhysicalDevice physical_device, - VkDisplayKHR display, - uint32_t *property_count, - VkDisplayModeProperties2KHR *properties) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device); - - return wsi_display_get_display_mode_properties2(physical_device, - &pdevice->wsi_device, - display, - property_count, - properties); -} - -VkResult -tu_CreateDisplayModeKHR(VkPhysicalDevice physical_device, - VkDisplayKHR display, - const VkDisplayModeCreateInfoKHR *create_info, - const VkAllocationCallbacks *allocator, - VkDisplayModeKHR *mode) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device); - - return wsi_display_create_display_mode(physical_device, - &pdevice->wsi_device, - display, - create_info, - allocator, - mode); -} - -VkResult -tu_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physical_device, - VkDisplayModeKHR mode_khr, - uint32_t plane_index, - VkDisplayPlaneCapabilitiesKHR *capabilities) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device); - - return wsi_get_display_plane_capabilities(physical_device, - &pdevice->wsi_device, - mode_khr, - plane_index, - capabilities); -} - -VkResult -tu_GetDisplayPlaneCapabilities2KHR(VkPhysicalDevice physical_device, - const VkDisplayPlaneInfo2KHR *pDisplayPlaneInfo, - VkDisplayPlaneCapabilities2KHR *capabilities) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device); - - return wsi_get_display_plane_capabilities2(physical_device, - &pdevice->wsi_device, - pDisplayPlaneInfo, - capabilities); -} - -VkResult -tu_CreateDisplayPlaneSurfaceKHR( - VkInstance _instance, - const VkDisplaySurfaceCreateInfoKHR *create_info, - const VkAllocationCallbacks *allocator, - VkSurfaceKHR *surface) -{ - TU_FROM_HANDLE(tu_instance, instance, _instance); - const VkAllocationCallbacks *alloc; - - if (allocator) - alloc = allocator; - else - alloc = &instance->alloc; - - return wsi_create_display_surface(_instance, alloc, - create_info, surface); -} - -VkResult -tu_ReleaseDisplayEXT(VkPhysicalDevice physical_device, - VkDisplayKHR display) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device); - - return wsi_release_display(physical_device, - &pdevice->wsi_device, - display); -} - -#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT -VkResult -tu_AcquireXlibDisplayEXT(VkPhysicalDevice physical_device, - Display *dpy, - VkDisplayKHR display) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device); - - return wsi_acquire_xlib_display(physical_device, - &pdevice->wsi_device, - dpy, - display); -} - -VkResult -tu_GetRandROutputDisplayEXT(VkPhysicalDevice physical_device, - Display *dpy, - RROutput output, - VkDisplayKHR *display) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device); - - return wsi_get_randr_output_display(physical_device, - &pdevice->wsi_device, - dpy, - output, - display); -} -#endif /* VK_USE_PLATFORM_XLIB_XRANDR_EXT */ - -/* VK_EXT_display_control */ - -VkResult -tu_DisplayPowerControlEXT(VkDevice _device, - VkDisplayKHR display, - const VkDisplayPowerInfoEXT *display_power_info) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - return wsi_display_power_control(_device, - &device->physical_device->wsi_device, - display, - display_power_info); -} - -VkResult -tu_RegisterDeviceEventEXT(VkDevice _device, - const VkDeviceEventInfoEXT *device_event_info, - const VkAllocationCallbacks *allocator, - VkFence *_fence) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_fence *fence; - VkResult ret; - - fence = vk_alloc2(&device->instance->alloc, allocator, sizeof (*fence), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!fence) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - tu_fence_init(fence, false); - - ret = wsi_register_device_event(_device, - &device->physical_device->wsi_device, - device_event_info, - allocator, - &fence->fence_wsi); - if (ret == VK_SUCCESS) - *_fence = tu_fence_to_handle(fence); - else - vk_free2(&device->instance->alloc, allocator, fence); - return ret; -} - -VkResult -tu_RegisterDisplayEventEXT(VkDevice _device, - VkDisplayKHR display, - const VkDisplayEventInfoEXT *display_event_info, - const VkAllocationCallbacks *allocator, - VkFence *_fence) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - struct tu_fence *fence; - VkResult ret; - - fence = vk_alloc2(&device->instance->alloc, allocator, sizeof (*fence), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!fence) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - tu_fence_init(fence, false); - - ret = wsi_register_display_event(_device, - &device->physical_device->wsi_device, - display, - display_event_info, - allocator, - &fence->fence_wsi); - - if (ret == VK_SUCCESS) - *_fence = tu_fence_to_handle(fence); - else - vk_free2(&device->instance->alloc, allocator, fence); - return ret; -} - -VkResult -tu_GetSwapchainCounterEXT(VkDevice _device, - VkSwapchainKHR swapchain, - VkSurfaceCounterFlagBitsEXT flag_bits, - uint64_t *value) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - return wsi_get_swapchain_counter(_device, - &device->physical_device->wsi_device, - swapchain, - flag_bits, - value); -} - |