summaryrefslogtreecommitdiff
path: root/lib/mesa/src/freedreno
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2022-09-02 05:47:02 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2022-09-02 05:47:02 +0000
commit0dbbf1e0708df85a357d70e2708c0a11aeb5480e (patch)
tree6656ff8eb8b15a2fc1c02888973caf618388cfd0 /lib/mesa/src/freedreno
parent5f66494d31f735486b8222ecfa0a0c9046e92543 (diff)
Merge Mesa 22.1.7
Diffstat (limited to 'lib/mesa/src/freedreno')
-rw-r--r--lib/mesa/src/freedreno/afuc/Makefile368
-rw-r--r--lib/mesa/src/freedreno/computerator/a6xx.c33
-rw-r--r--lib/mesa/src/freedreno/computerator/ir3_asm.c5
-rw-r--r--lib/mesa/src/freedreno/computerator/main.c6
-rw-r--r--lib/mesa/src/freedreno/drm/msm_bo.c197
-rw-r--r--lib/mesa/src/freedreno/drm/msm_device.c61
-rw-r--r--lib/mesa/src/freedreno/drm/msm_pipe.c220
-rw-r--r--lib/mesa/src/freedreno/drm/msm_priv.h140
-rw-r--r--lib/mesa/src/freedreno/drm/msm_ringbuffer.c722
-rw-r--r--lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.c568
-rw-r--r--lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.h140
-rw-r--r--lib/mesa/src/freedreno/fdl/fd6_layout_test.c237
-rw-r--r--lib/mesa/src/freedreno/ir3/ir3_dce.c6
-rw-r--r--lib/mesa/src/freedreno/ir3/ir3_delay.c223
-rw-r--r--lib/mesa/src/freedreno/ir3/ir3_lexer.l60
-rw-r--r--lib/mesa/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c138
-rw-r--r--lib/mesa/src/freedreno/ir3/ir3_parser.y170
-rw-r--r--lib/mesa/src/freedreno/ir3/ir3_postsched.c244
-rw-r--r--lib/mesa/src/freedreno/ir3/ir3_ra.h2
-rw-r--r--lib/mesa/src/freedreno/ir3/tests/disasm.c368
-rw-r--r--lib/mesa/src/freedreno/registers/dsi/dsi_phy_5nm.xml228
-rw-r--r--lib/mesa/src/freedreno/vulkan/tu_clear_blit.c919
-rw-r--r--lib/mesa/src/freedreno/vulkan/tu_legacy.c88
-rw-r--r--lib/mesa/src/freedreno/vulkan/tu_wsi_display.c339
24 files changed, 1384 insertions, 4098 deletions
diff --git a/lib/mesa/src/freedreno/afuc/Makefile b/lib/mesa/src/freedreno/afuc/Makefile
deleted file mode 100644
index 12e6f3aeb..000000000
--- a/lib/mesa/src/freedreno/afuc/Makefile
+++ /dev/null
@@ -1,368 +0,0 @@
-# CMAKE generated file: DO NOT EDIT!
-# Generated by "Unix Makefiles" Generator, CMake Version 3.17
-
-# Default target executed when no arguments are given to make.
-default_target: all
-
-.PHONY : default_target
-
-# Allow only one "make -f Makefile2" at a time, but pass parallelism.
-.NOTPARALLEL:
-
-
-#=============================================================================
-# Special targets provided by cmake.
-
-# Disable implicit rules so canonical targets will work.
-.SUFFIXES:
-
-
-# Disable VCS-based implicit rules.
-% : %,v
-
-
-# Disable VCS-based implicit rules.
-% : RCS/%
-
-
-# Disable VCS-based implicit rules.
-% : RCS/%,v
-
-
-# Disable VCS-based implicit rules.
-% : SCCS/s.%
-
-
-# Disable VCS-based implicit rules.
-% : s.%
-
-
-.SUFFIXES: .hpux_make_needs_suffix_list
-
-
-# Command-line flag to silence nested $(MAKE).
-$(VERBOSE)MAKESILENT = -s
-
-# Suppress display of executed commands.
-$(VERBOSE).SILENT:
-
-
-# A target that is always out of date.
-cmake_force:
-
-.PHONY : cmake_force
-
-#=============================================================================
-# Set environment variables for the build.
-
-# The shell in which to execute make rules.
-SHELL = /bin/sh
-
-# The CMake executable.
-CMAKE_COMMAND = /usr/bin/cmake
-
-# The command to remove a file.
-RM = /usr/bin/cmake -E rm -f
-
-# Escaping for special characters.
-EQUALS = =
-
-# The top-level source directory on which CMake was run.
-CMAKE_SOURCE_DIR = /home/robclark/src/envytools
-
-# The top-level build directory on which CMake was run.
-CMAKE_BINARY_DIR = /home/robclark/src/envytools
-
-#=============================================================================
-# Targets provided globally by CMake.
-
-# Special rule for the target install/strip
-install/strip: preinstall
- @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..."
- /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake
-.PHONY : install/strip
-
-# Special rule for the target install/strip
-install/strip/fast: preinstall/fast
- @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..."
- /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake
-.PHONY : install/strip/fast
-
-# Special rule for the target install/local
-install/local: preinstall
- @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..."
- /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake
-.PHONY : install/local
-
-# Special rule for the target install/local
-install/local/fast: preinstall/fast
- @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..."
- /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake
-.PHONY : install/local/fast
-
-# Special rule for the target edit_cache
-edit_cache:
- @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..."
- /usr/bin/ccmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
-.PHONY : edit_cache
-
-# Special rule for the target edit_cache
-edit_cache/fast: edit_cache
-
-.PHONY : edit_cache/fast
-
-# Special rule for the target test
-test:
- @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running tests..."
- /usr/bin/ctest --force-new-ctest-process $(ARGS)
-.PHONY : test
-
-# Special rule for the target test
-test/fast: test
-
-.PHONY : test/fast
-
-# Special rule for the target install
-install: preinstall
- @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..."
- /usr/bin/cmake -P cmake_install.cmake
-.PHONY : install
-
-# Special rule for the target install
-install/fast: preinstall/fast
- @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..."
- /usr/bin/cmake -P cmake_install.cmake
-.PHONY : install/fast
-
-# Special rule for the target list_install_components
-list_install_components:
- @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Available install components are: \"Unspecified\""
-.PHONY : list_install_components
-
-# Special rule for the target list_install_components
-list_install_components/fast: list_install_components
-
-.PHONY : list_install_components/fast
-
-# Special rule for the target rebuild_cache
-rebuild_cache:
- @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
- /usr/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
-.PHONY : rebuild_cache
-
-# Special rule for the target rebuild_cache
-rebuild_cache/fast: rebuild_cache
-
-.PHONY : rebuild_cache/fast
-
-# The main all target
-all: cmake_check_build_system
- cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -E cmake_progress_start /home/robclark/src/envytools/CMakeFiles /home/robclark/src/envytools/afuc/CMakeFiles/progress.marks
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/all
- $(CMAKE_COMMAND) -E cmake_progress_start /home/robclark/src/envytools/CMakeFiles 0
-.PHONY : all
-
-# The main clean target
-clean:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/clean
-.PHONY : clean
-
-# The main clean target
-clean/fast: clean
-
-.PHONY : clean/fast
-
-# Prepare targets for installation.
-preinstall: all
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/preinstall
-.PHONY : preinstall
-
-# Prepare targets for installation.
-preinstall/fast:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/preinstall
-.PHONY : preinstall/fast
-
-# clear depends
-depend:
- cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
-.PHONY : depend
-
-# Convenience name for target.
-afuc/CMakeFiles/asm.dir/rule:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/CMakeFiles/asm.dir/rule
-.PHONY : afuc/CMakeFiles/asm.dir/rule
-
-# Convenience name for target.
-asm: afuc/CMakeFiles/asm.dir/rule
-
-.PHONY : asm
-
-# fast build rule for target.
-asm/fast:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/build
-.PHONY : asm/fast
-
-# Convenience name for target.
-afuc/CMakeFiles/disasm.dir/rule:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/CMakeFiles/disasm.dir/rule
-.PHONY : afuc/CMakeFiles/disasm.dir/rule
-
-# Convenience name for target.
-disasm: afuc/CMakeFiles/disasm.dir/rule
-
-.PHONY : disasm
-
-# fast build rule for target.
-disasm/fast:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/build
-.PHONY : disasm/fast
-
-asm.o: asm.c.o
-
-.PHONY : asm.o
-
-# target to build an object file
-asm.c.o:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.o
-.PHONY : asm.c.o
-
-asm.i: asm.c.i
-
-.PHONY : asm.i
-
-# target to preprocess a source file
-asm.c.i:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.i
-.PHONY : asm.c.i
-
-asm.s: asm.c.s
-
-.PHONY : asm.s
-
-# target to generate assembly for a file
-asm.c.s:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.s
-.PHONY : asm.c.s
-
-disasm.o: disasm.c.o
-
-.PHONY : disasm.o
-
-# target to build an object file
-disasm.c.o:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.o
-.PHONY : disasm.c.o
-
-disasm.i: disasm.c.i
-
-.PHONY : disasm.i
-
-# target to preprocess a source file
-disasm.c.i:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.i
-.PHONY : disasm.c.i
-
-disasm.s: disasm.c.s
-
-.PHONY : disasm.s
-
-# target to generate assembly for a file
-disasm.c.s:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.s
-.PHONY : disasm.c.s
-
-lexer.o: lexer.c.o
-
-.PHONY : lexer.o
-
-# target to build an object file
-lexer.c.o:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.o
-.PHONY : lexer.c.o
-
-lexer.i: lexer.c.i
-
-.PHONY : lexer.i
-
-# target to preprocess a source file
-lexer.c.i:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.i
-.PHONY : lexer.c.i
-
-lexer.s: lexer.c.s
-
-.PHONY : lexer.s
-
-# target to generate assembly for a file
-lexer.c.s:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.s
-.PHONY : lexer.c.s
-
-parser.o: parser.c.o
-
-.PHONY : parser.o
-
-# target to build an object file
-parser.c.o:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.o
-.PHONY : parser.c.o
-
-parser.i: parser.c.i
-
-.PHONY : parser.i
-
-# target to preprocess a source file
-parser.c.i:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.i
-.PHONY : parser.c.i
-
-parser.s: parser.c.s
-
-.PHONY : parser.s
-
-# target to generate assembly for a file
-parser.c.s:
- cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.s
-.PHONY : parser.c.s
-
-# Help Target
-help:
- @echo "The following are some of the valid targets for this Makefile:"
- @echo "... all (the default if no target is provided)"
- @echo "... clean"
- @echo "... depend"
- @echo "... edit_cache"
- @echo "... install"
- @echo "... install/local"
- @echo "... install/strip"
- @echo "... list_install_components"
- @echo "... rebuild_cache"
- @echo "... test"
- @echo "... asm"
- @echo "... disasm"
- @echo "... asm.o"
- @echo "... asm.i"
- @echo "... asm.s"
- @echo "... disasm.o"
- @echo "... disasm.i"
- @echo "... disasm.s"
- @echo "... lexer.o"
- @echo "... lexer.i"
- @echo "... lexer.s"
- @echo "... parser.o"
- @echo "... parser.i"
- @echo "... parser.s"
-.PHONY : help
-
-
-
-#=============================================================================
-# Special targets to cleanup operation of make.
-
-# Special rule to run CMake to check the build system integrity.
-# No rule that depends on this can have commands that come from listfiles
-# because they might be regenerated.
-cmake_check_build_system:
- cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
-.PHONY : cmake_check_build_system
-
diff --git a/lib/mesa/src/freedreno/computerator/a6xx.c b/lib/mesa/src/freedreno/computerator/a6xx.c
index 67104a6db..a0ce6f986 100644
--- a/lib/mesa/src/freedreno/computerator/a6xx.c
+++ b/lib/mesa/src/freedreno/computerator/a6xx.c
@@ -158,6 +158,12 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
OUT_RING(ring, 0x41);
+ if (a6xx_backend->info->a6xx.has_lpac) {
+ OUT_PKT4(ring, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, 1);
+ OUT_RING(ring, A6XX_HLSQ_CS_UNKNOWN_B9D0_SHARED_SIZE(1) |
+ A6XX_HLSQ_CS_UNKNOWN_B9D0_UNK6);
+ }
+
uint32_t local_invocation_id, work_group_id;
local_invocation_id =
ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
@@ -171,6 +177,16 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
OUT_RING(ring, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz));
+ if (a6xx_backend->info->a6xx.has_lpac) {
+ OUT_PKT4(ring, REG_A6XX_SP_CS_CNTL_0, 2);
+ OUT_RING(ring, A6XX_SP_CS_CNTL_0_WGIDCONSTID(work_group_id) |
+ A6XX_SP_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
+ A6XX_SP_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) |
+ A6XX_SP_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
+ OUT_RING(ring, A6XX_SP_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
+ A6XX_SP_CS_CNTL_1_THREADSIZE(thrsz));
+ }
+
OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2);
OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */
@@ -180,12 +196,14 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2);
OUT_RELOC(ring, v->bo, 0, 0, 0);
+ uint32_t shader_preload_size =
+ MIN2(v->instrlen, a6xx_backend->info->a6xx.instr_cache_size);
OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3);
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) |
- CP_LOAD_STATE6_0_NUM_UNIT(v->instrlen));
+ CP_LOAD_STATE6_0_NUM_UNIT(shader_preload_size));
OUT_RELOC(ring, v->bo, 0, 0, 0);
if (v->pvtmem_size > 0) {
@@ -296,11 +314,11 @@ cs_ibo_emit(struct fd_ringbuffer *ring, struct fd_submit *submit,
unsigned width = sz & MASK(15);
unsigned height = sz >> 15;
- OUT_RING(state, A6XX_IBO_0_FMT(FMT6_32_UINT) | A6XX_IBO_0_TILE_MODE(0));
- OUT_RING(state, A6XX_IBO_1_WIDTH(width) | A6XX_IBO_1_HEIGHT(height));
- OUT_RING(state, A6XX_IBO_2_PITCH(0) | A6XX_IBO_2_UNK4 | A6XX_IBO_2_UNK31 |
- A6XX_IBO_2_TYPE(A6XX_TEX_1D));
- OUT_RING(state, A6XX_IBO_3_ARRAY_PITCH(0));
+ OUT_RING(state, A6XX_TEX_CONST_0_FMT(FMT6_32_UINT) | A6XX_TEX_CONST_0_TILE_MODE(0));
+ OUT_RING(state, A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height));
+ OUT_RING(state, A6XX_TEX_CONST_2_PITCH(0) | A6XX_TEX_CONST_2_BUFFER |
+ A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER));
+ OUT_RING(state, A6XX_TEX_CONST_3_ARRAY_PITCH(0));
OUT_RELOC(state, kernel->bufs[i], 0, 0, 0);
OUT_RING(state, 0x00000000);
OUT_RING(state, 0x00000000);
@@ -519,7 +537,8 @@ a6xx_init(struct fd_device *dev, const struct fd_dev_id *dev_id)
.read_perfcntrs = a6xx_read_perfcntrs,
};
- a6xx_backend->compiler = ir3_compiler_create(dev, dev_id, false);
+ a6xx_backend->compiler = ir3_compiler_create(dev, dev_id,
+ &(struct ir3_compiler_options){});
a6xx_backend->dev = dev;
a6xx_backend->info = fd_dev_info(dev_id);
diff --git a/lib/mesa/src/freedreno/computerator/ir3_asm.c b/lib/mesa/src/freedreno/computerator/ir3_asm.c
index b9c295adf..e0f3c9bc5 100644
--- a/lib/mesa/src/freedreno/computerator/ir3_asm.c
+++ b/lib/mesa/src/freedreno/computerator/ir3_asm.c
@@ -35,8 +35,6 @@ ir3_asm_assemble(struct ir3_compiler *c, FILE *in)
errx(-1, "assembler failed");
struct ir3_shader_variant *v = shader->variants;
- v->mergedregs = true;
-
kernel->v = v;
kernel->bin = v->bin;
@@ -55,6 +53,9 @@ ir3_asm_assemble(struct ir3_compiler *c, FILE *in)
memcpy(fd_bo_map(v->bo), kernel->bin, sz);
+ /* Always include shaders in kernel crash dumps. */
+ fd_bo_mark_for_dump(v->bo);
+
return kernel;
}
diff --git a/lib/mesa/src/freedreno/computerator/main.c b/lib/mesa/src/freedreno/computerator/main.c
index 0468380be..6c4f14534 100644
--- a/lib/mesa/src/freedreno/computerator/main.c
+++ b/lib/mesa/src/freedreno/computerator/main.c
@@ -236,11 +236,7 @@ main(int argc, char **argv)
}
}
- int fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
- if (fd < 0)
- err(1, "could not open drm device");
-
- struct fd_device *dev = fd_device_new(fd);
+ struct fd_device *dev = fd_device_open();
struct fd_pipe *pipe = fd_pipe_new(dev, FD_PIPE_3D);
const struct fd_dev_id *dev_id = fd_pipe_dev_id(pipe);
diff --git a/lib/mesa/src/freedreno/drm/msm_bo.c b/lib/mesa/src/freedreno/drm/msm_bo.c
deleted file mode 100644
index da2609903..000000000
--- a/lib/mesa/src/freedreno/drm/msm_bo.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- * Rob Clark <robclark@freedesktop.org>
- */
-
-#include "msm_priv.h"
-
-static int bo_allocate(struct msm_bo *msm_bo)
-{
- struct fd_bo *bo = &msm_bo->base;
- if (!msm_bo->offset) {
- struct drm_msm_gem_info req = {
- .handle = bo->handle,
- .info = MSM_INFO_GET_OFFSET,
- };
- int ret;
-
- /* if the buffer is already backed by pages then this
- * doesn't actually do anything (other than giving us
- * the offset)
- */
- ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO,
- &req, sizeof(req));
- if (ret) {
- ERROR_MSG("alloc failed: %s", strerror(errno));
- return ret;
- }
-
- msm_bo->offset = req.value;
- }
-
- return 0;
-}
-
-static int msm_bo_offset(struct fd_bo *bo, uint64_t *offset)
-{
- struct msm_bo *msm_bo = to_msm_bo(bo);
- int ret = bo_allocate(msm_bo);
- if (ret)
- return ret;
- *offset = msm_bo->offset;
- return 0;
-}
-
-static int msm_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
-{
- struct drm_msm_gem_cpu_prep req = {
- .handle = bo->handle,
- .op = op,
- };
-
- get_abs_timeout(&req.timeout, 5000000000);
-
- return drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_PREP, &req, sizeof(req));
-}
-
-static void msm_bo_cpu_fini(struct fd_bo *bo)
-{
- struct drm_msm_gem_cpu_fini req = {
- .handle = bo->handle,
- };
-
- drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_FINI, &req, sizeof(req));
-}
-
-static int msm_bo_madvise(struct fd_bo *bo, int willneed)
-{
- struct drm_msm_gem_madvise req = {
- .handle = bo->handle,
- .madv = willneed ? MSM_MADV_WILLNEED : MSM_MADV_DONTNEED,
- };
- int ret;
-
- /* older kernels do not support this: */
- if (bo->dev->version < FD_VERSION_MADVISE)
- return willneed;
-
- ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_MADVISE, &req, sizeof(req));
- if (ret)
- return ret;
-
- return req.retained;
-}
-
-static uint64_t msm_bo_iova(struct fd_bo *bo)
-{
- struct drm_msm_gem_info req = {
- .handle = bo->handle,
- .info = MSM_INFO_GET_IOVA,
- };
- int ret;
-
- ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
- debug_assert(ret == 0);
-
- return req.value;
-}
-
-static void msm_bo_set_name(struct fd_bo *bo, const char *fmt, va_list ap)
-{
- struct drm_msm_gem_info req = {
- .handle = bo->handle,
- .info = MSM_INFO_SET_NAME,
- };
- char buf[32];
- int sz;
-
- if (bo->dev->version < FD_VERSION_SOFTPIN)
- return;
-
- sz = vsnprintf(buf, sizeof(buf), fmt, ap);
-
- req.value = VOID2U64(buf);
- req.len = MIN2(sz, sizeof(buf));
-
- drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
-}
-
-static void msm_bo_destroy(struct fd_bo *bo)
-{
- struct msm_bo *msm_bo = to_msm_bo(bo);
- free(msm_bo);
-}
-
-static const struct fd_bo_funcs funcs = {
- .offset = msm_bo_offset,
- .cpu_prep = msm_bo_cpu_prep,
- .cpu_fini = msm_bo_cpu_fini,
- .madvise = msm_bo_madvise,
- .iova = msm_bo_iova,
- .set_name = msm_bo_set_name,
- .destroy = msm_bo_destroy,
-};
-
-/* allocate a buffer handle: */
-int msm_bo_new_handle(struct fd_device *dev,
- uint32_t size, uint32_t flags, uint32_t *handle)
-{
- struct drm_msm_gem_new req = {
- .size = size,
- .flags = MSM_BO_WC, // TODO figure out proper flags..
- };
- int ret;
-
- if (flags & DRM_FREEDRENO_GEM_SCANOUT)
- req.flags |= MSM_BO_SCANOUT;
-
- if (flags & DRM_FREEDRENO_GEM_GPUREADONLY)
- req.flags |= MSM_BO_GPU_READONLY;
-
- ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW,
- &req, sizeof(req));
- if (ret)
- return ret;
-
- *handle = req.handle;
-
- return 0;
-}
-
-/* allocate a new buffer object */
-struct fd_bo * msm_bo_from_handle(struct fd_device *dev,
- uint32_t size, uint32_t handle)
-{
- struct msm_bo *msm_bo;
- struct fd_bo *bo;
-
- msm_bo = calloc(1, sizeof(*msm_bo));
- if (!msm_bo)
- return NULL;
-
- bo = &msm_bo->base;
- bo->funcs = &funcs;
-
- return bo;
-}
diff --git a/lib/mesa/src/freedreno/drm/msm_device.c b/lib/mesa/src/freedreno/drm/msm_device.c
deleted file mode 100644
index d391ef013..000000000
--- a/lib/mesa/src/freedreno/drm/msm_device.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- * Rob Clark <robclark@freedesktop.org>
- */
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include "msm_priv.h"
-
-static void msm_device_destroy(struct fd_device *dev)
-{
- struct msm_device *msm_dev = to_msm_device(dev);
- free(msm_dev);
-}
-
-static const struct fd_device_funcs funcs = {
- .bo_new_handle = msm_bo_new_handle,
- .bo_from_handle = msm_bo_from_handle,
- .pipe_new = msm_pipe_new,
- .destroy = msm_device_destroy,
-};
-
-struct fd_device * msm_device_new(int fd)
-{
- struct msm_device *msm_dev;
- struct fd_device *dev;
-
- msm_dev = calloc(1, sizeof(*msm_dev));
- if (!msm_dev)
- return NULL;
-
- dev = &msm_dev->base;
- dev->funcs = &funcs;
-
- dev->bo_size = sizeof(struct msm_bo);
-
- return dev;
-}
diff --git a/lib/mesa/src/freedreno/drm/msm_pipe.c b/lib/mesa/src/freedreno/drm/msm_pipe.c
deleted file mode 100644
index 7d5b9fcd7..000000000
--- a/lib/mesa/src/freedreno/drm/msm_pipe.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- * Rob Clark <robclark@freedesktop.org>
- */
-
-#include "util/slab.h"
-
-#include "msm_priv.h"
-
-static int query_param(struct fd_pipe *pipe, uint32_t param,
- uint64_t *value)
-{
- struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
- struct drm_msm_param req = {
- .pipe = msm_pipe->pipe,
- .param = param,
- };
- int ret;
-
- ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_GET_PARAM,
- &req, sizeof(req));
- if (ret)
- return ret;
-
- *value = req.value;
-
- return 0;
-}
-
-static int msm_pipe_get_param(struct fd_pipe *pipe,
- enum fd_param_id param, uint64_t *value)
-{
- struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
- switch(param) {
- case FD_DEVICE_ID: // XXX probably get rid of this..
- case FD_GPU_ID:
- *value = msm_pipe->gpu_id;
- return 0;
- case FD_GMEM_SIZE:
- *value = msm_pipe->gmem;
- return 0;
- case FD_CHIP_ID:
- *value = msm_pipe->chip_id;
- return 0;
- case FD_MAX_FREQ:
- return query_param(pipe, MSM_PARAM_MAX_FREQ, value);
- case FD_TIMESTAMP:
- return query_param(pipe, MSM_PARAM_TIMESTAMP, value);
- case FD_NR_RINGS:
- return query_param(pipe, MSM_PARAM_NR_RINGS, value);
- default:
- ERROR_MSG("invalid param id: %d", param);
- return -1;
- }
-}
-
-static int msm_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp,
- uint64_t timeout)
-{
- struct fd_device *dev = pipe->dev;
- struct drm_msm_wait_fence req = {
- .fence = timestamp,
- .queueid = to_msm_pipe(pipe)->queue_id,
- };
- int ret;
-
- get_abs_timeout(&req.timeout, timeout);
-
- ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req));
- if (ret) {
- ERROR_MSG("wait-fence failed! %d (%s)", ret, strerror(errno));
- return ret;
- }
-
- return 0;
-}
-
-static int open_submitqueue(struct fd_pipe *pipe, uint32_t prio)
-{
- struct drm_msm_submitqueue req = {
- .flags = 0,
- .prio = prio,
- };
- uint64_t nr_rings = 1;
- int ret;
-
- if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES) {
- to_msm_pipe(pipe)->queue_id = 0;
- return 0;
- }
-
- msm_pipe_get_param(pipe, FD_NR_RINGS, &nr_rings);
-
- req.prio = MIN2(req.prio, MAX2(nr_rings, 1) - 1);
-
- ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_NEW,
- &req, sizeof(req));
- if (ret) {
- ERROR_MSG("could not create submitqueue! %d (%s)", ret, strerror(errno));
- return ret;
- }
-
- to_msm_pipe(pipe)->queue_id = req.id;
- return 0;
-}
-
-static void close_submitqueue(struct fd_pipe *pipe, uint32_t queue_id)
-{
- if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES)
- return;
-
- drmCommandWrite(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE,
- &queue_id, sizeof(queue_id));
-}
-
-static void msm_pipe_destroy(struct fd_pipe *pipe)
-{
- struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
- close_submitqueue(pipe, msm_pipe->queue_id);
- free(msm_pipe);
-}
-
-static const struct fd_pipe_funcs sp_funcs = {
- .ringbuffer_new_object = msm_ringbuffer_sp_new_object,
- .submit_new = msm_submit_sp_new,
- .get_param = msm_pipe_get_param,
- .wait = msm_pipe_wait,
- .destroy = msm_pipe_destroy,
-};
-
-static const struct fd_pipe_funcs legacy_funcs = {
- .ringbuffer_new_object = msm_ringbuffer_new_object,
- .submit_new = msm_submit_new,
- .get_param = msm_pipe_get_param,
- .wait = msm_pipe_wait,
- .destroy = msm_pipe_destroy,
-};
-
-static uint64_t get_param(struct fd_pipe *pipe, uint32_t param)
-{
- uint64_t value;
- int ret = query_param(pipe, param, &value);
- if (ret) {
- ERROR_MSG("get-param failed! %d (%s)", ret, strerror(errno));
- return 0;
- }
- return value;
-}
-
-struct fd_pipe * msm_pipe_new(struct fd_device *dev,
- enum fd_pipe_id id, uint32_t prio)
-{
- static const uint32_t pipe_id[] = {
- [FD_PIPE_3D] = MSM_PIPE_3D0,
- [FD_PIPE_2D] = MSM_PIPE_2D0,
- };
- struct msm_pipe *msm_pipe = NULL;
- struct fd_pipe *pipe = NULL;
-
- msm_pipe = calloc(1, sizeof(*msm_pipe));
- if (!msm_pipe) {
- ERROR_MSG("allocation failed");
- goto fail;
- }
-
- pipe = &msm_pipe->base;
-
- if (fd_device_version(dev) >= FD_VERSION_SOFTPIN) {
- pipe->funcs = &sp_funcs;
- } else {
- pipe->funcs = &legacy_funcs;
- }
-
- /* initialize before get_param(): */
- pipe->dev = dev;
- msm_pipe->pipe = pipe_id[id];
-
- /* these params should be supported since the first version of drm/msm: */
- msm_pipe->gpu_id = get_param(pipe, MSM_PARAM_GPU_ID);
- msm_pipe->gmem = get_param(pipe, MSM_PARAM_GMEM_SIZE);
- msm_pipe->chip_id = get_param(pipe, MSM_PARAM_CHIP_ID);
-
- if (! msm_pipe->gpu_id)
- goto fail;
-
- INFO_MSG("Pipe Info:");
- INFO_MSG(" GPU-id: %d", msm_pipe->gpu_id);
- INFO_MSG(" Chip-id: 0x%08x", msm_pipe->chip_id);
- INFO_MSG(" GMEM size: 0x%08x", msm_pipe->gmem);
-
- if (open_submitqueue(pipe, prio))
- goto fail;
-
- return pipe;
-fail:
- if (pipe)
- fd_pipe_del(pipe);
- return NULL;
-}
diff --git a/lib/mesa/src/freedreno/drm/msm_priv.h b/lib/mesa/src/freedreno/drm/msm_priv.h
deleted file mode 100644
index 9cb60bc1d..000000000
--- a/lib/mesa/src/freedreno/drm/msm_priv.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- * Rob Clark <robclark@freedesktop.org>
- */
-
-#ifndef MSM_PRIV_H_
-#define MSM_PRIV_H_
-
-#include "freedreno_priv.h"
-
-#ifndef __user
-# define __user
-#endif
-
-#include "msm_drm.h"
-
-struct msm_device {
- struct fd_device base;
- struct fd_bo_cache ring_cache;
-};
-FD_DEFINE_CAST(fd_device, msm_device);
-
-struct fd_device * msm_device_new(int fd);
-
-struct msm_pipe {
- struct fd_pipe base;
- uint32_t pipe;
- uint32_t gpu_id;
- uint32_t gmem;
- uint32_t chip_id;
- uint32_t queue_id;
-};
-FD_DEFINE_CAST(fd_pipe, msm_pipe);
-
-struct fd_pipe * msm_pipe_new(struct fd_device *dev,
- enum fd_pipe_id id, uint32_t prio);
-
-struct fd_ringbuffer * msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size);
-struct fd_ringbuffer * msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size);
-
-struct fd_submit * msm_submit_new(struct fd_pipe *pipe);
-struct fd_submit * msm_submit_sp_new(struct fd_pipe *pipe);
-
-struct msm_bo {
- struct fd_bo base;
- uint64_t offset;
- /* to avoid excess hashtable lookups, cache the ring this bo was
- * last emitted on (since that will probably also be the next ring
- * it is emitted on)
- */
- unsigned current_submit_seqno;
- uint32_t idx;
-};
-FD_DEFINE_CAST(fd_bo, msm_bo);
-
-int msm_bo_new_handle(struct fd_device *dev,
- uint32_t size, uint32_t flags, uint32_t *handle);
-struct fd_bo * msm_bo_from_handle(struct fd_device *dev,
- uint32_t size, uint32_t handle);
-
-static inline void
-msm_dump_submit(struct drm_msm_gem_submit *req)
-{
- for (unsigned i = 0; i < req->nr_bos; i++) {
- struct drm_msm_gem_submit_bo *bos = U642VOID(req->bos);
- struct drm_msm_gem_submit_bo *bo = &bos[i];
- ERROR_MSG(" bos[%d]: handle=%u, flags=%x", i, bo->handle, bo->flags);
- }
- for (unsigned i = 0; i < req->nr_cmds; i++) {
- struct drm_msm_gem_submit_cmd *cmds = U642VOID(req->cmds);
- struct drm_msm_gem_submit_cmd *cmd = &cmds[i];
- struct drm_msm_gem_submit_reloc *relocs = U642VOID(cmd->relocs);
- ERROR_MSG(" cmd[%d]: type=%u, submit_idx=%u, submit_offset=%u, size=%u",
- i, cmd->type, cmd->submit_idx, cmd->submit_offset, cmd->size);
- for (unsigned j = 0; j < cmd->nr_relocs; j++) {
- struct drm_msm_gem_submit_reloc *r = &relocs[j];
- ERROR_MSG(" reloc[%d]: submit_offset=%u, or=%08x, shift=%d, reloc_idx=%u"
- ", reloc_offset=%"PRIu64, j, r->submit_offset, r->or, r->shift,
- r->reloc_idx, r->reloc_offset);
- }
- }
-}
-
-static inline void get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
-{
- struct timespec t;
- uint32_t s = ns / 1000000000;
- clock_gettime(CLOCK_MONOTONIC, &t);
- tv->tv_sec = t.tv_sec + s;
- tv->tv_nsec = t.tv_nsec + ns - (s * 1000000000);
-}
-
-/*
- * Stupid/simple growable array implementation:
- */
-
-static inline void *
-grow(void *ptr, uint16_t nr, uint16_t *max, uint16_t sz)
-{
- if ((nr + 1) > *max) {
- if ((*max * 2) < (nr + 1))
- *max = nr + 5;
- else
- *max = *max * 2;
- ptr = realloc(ptr, *max * sz);
- }
- return ptr;
-}
-
-#define DECLARE_ARRAY(type, name) \
- unsigned short nr_ ## name, max_ ## name; \
- type * name;
-
-#define APPEND(x, name) ({ \
- (x)->name = grow((x)->name, (x)->nr_ ## name, &(x)->max_ ## name, sizeof((x)->name[0])); \
- (x)->nr_ ## name ++; \
-})
-
-#endif /* MSM_PRIV_H_ */
diff --git a/lib/mesa/src/freedreno/drm/msm_ringbuffer.c b/lib/mesa/src/freedreno/drm/msm_ringbuffer.c
deleted file mode 100644
index 369f26f98..000000000
--- a/lib/mesa/src/freedreno/drm/msm_ringbuffer.c
+++ /dev/null
@@ -1,722 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- * Rob Clark <robclark@freedesktop.org>
- */
-
-#include <assert.h>
-#include <inttypes.h>
-
-#include "util/hash_table.h"
-#include "util/set.h"
-#include "util/slab.h"
-
-#include "drm/freedreno_ringbuffer.h"
-#include "msm_priv.h"
-
-/* The legacy implementation of submit/ringbuffer, which still does the
- * traditional reloc and cmd tracking
- */
-
-
-#define INIT_SIZE 0x1000
-
-static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER;
-
-
-struct msm_submit {
- struct fd_submit base;
-
- DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
- DECLARE_ARRAY(struct fd_bo *, bos);
-
- unsigned seqno;
-
- /* maps fd_bo to idx in bos table: */
- struct hash_table *bo_table;
-
- struct slab_mempool ring_pool;
-
- /* hash-set of associated rings: */
- struct set *ring_set;
-
- struct fd_ringbuffer *primary;
-
- /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
- * the same underlying bo)..
- *
- * We also rely on previous stateobj having been fully constructed
- * so we can reclaim extra space at it's end.
- */
- struct fd_ringbuffer *suballoc_ring;
-};
-FD_DEFINE_CAST(fd_submit, msm_submit);
-
-/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
- * and sizes. Ie. a finalized buffer can have no more commands appended to
- * it.
- */
-struct msm_cmd {
- struct fd_bo *ring_bo;
- unsigned size;
- DECLARE_ARRAY(struct drm_msm_gem_submit_reloc, relocs);
-};
-
-static struct msm_cmd *
-cmd_new(struct fd_bo *ring_bo)
-{
- struct msm_cmd *cmd = malloc(sizeof(*cmd));
- cmd->ring_bo = fd_bo_ref(ring_bo);
- cmd->size = 0;
- cmd->nr_relocs = cmd->max_relocs = 0;
- cmd->relocs = NULL;
- return cmd;
-}
-
-static void
-cmd_free(struct msm_cmd *cmd)
-{
- fd_bo_del(cmd->ring_bo);
- free(cmd->relocs);
- free(cmd);
-}
-
-/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
- * later copy into the submit when the stateobj rb is later referenced by
- * a regular rb:
- */
-struct msm_reloc_bo {
- struct fd_bo *bo;
- unsigned flags;
-};
-
-struct msm_ringbuffer {
- struct fd_ringbuffer base;
-
- /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
- unsigned offset;
-
- union {
- /* for _FD_RINGBUFFER_OBJECT case: */
- struct {
- struct fd_pipe *pipe;
- DECLARE_ARRAY(struct msm_reloc_bo, reloc_bos);
- struct set *ring_set;
- };
- /* for other cases: */
- struct {
- struct fd_submit *submit;
- DECLARE_ARRAY(struct msm_cmd *, cmds);
- };
- } u;
-
- struct msm_cmd *cmd; /* current cmd */
- struct fd_bo *ring_bo;
-};
-FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer);
-
-static void finalize_current_cmd(struct fd_ringbuffer *ring);
-static struct fd_ringbuffer * msm_ringbuffer_init(
- struct msm_ringbuffer *msm_ring,
- uint32_t size, enum fd_ringbuffer_flags flags);
-
-/* add (if needed) bo to submit and return index: */
-static uint32_t
-append_bo(struct msm_submit *submit, struct fd_bo *bo, uint32_t flags)
-{
- struct msm_bo *msm_bo = to_msm_bo(bo);
- uint32_t idx;
- pthread_mutex_lock(&idx_lock);
- if (likely(msm_bo->current_submit_seqno == submit->seqno)) {
- idx = msm_bo->idx;
- } else {
- uint32_t hash = _mesa_hash_pointer(bo);
- struct hash_entry *entry;
-
- entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
- if (entry) {
- /* found */
- idx = (uint32_t)(uintptr_t)entry->data;
- } else {
- idx = APPEND(submit, submit_bos);
- idx = APPEND(submit, bos);
-
- submit->submit_bos[idx].flags = 0;
- submit->submit_bos[idx].handle = bo->handle;
- submit->submit_bos[idx].presumed = 0;
-
- submit->bos[idx] = fd_bo_ref(bo);
-
- _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
- (void *)(uintptr_t)idx);
- }
- msm_bo->current_submit_seqno = submit->seqno;
- msm_bo->idx = idx;
- }
- pthread_mutex_unlock(&idx_lock);
- if (flags & FD_RELOC_READ)
- submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
- if (flags & FD_RELOC_WRITE)
- submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
- return idx;
-}
-
-static void
-append_ring(struct set *set, struct fd_ringbuffer *ring)
-{
- uint32_t hash = _mesa_hash_pointer(ring);
-
- if (!_mesa_set_search_pre_hashed(set, hash, ring)) {
- fd_ringbuffer_ref(ring);
- _mesa_set_add_pre_hashed(set, hash, ring);
- }
-}
-
-static void
-msm_submit_suballoc_ring_bo(struct fd_submit *submit,
- struct msm_ringbuffer *msm_ring, uint32_t size)
-{
- struct msm_submit *msm_submit = to_msm_submit(submit);
- unsigned suballoc_offset = 0;
- struct fd_bo *suballoc_bo = NULL;
-
- if (msm_submit->suballoc_ring) {
- struct msm_ringbuffer *suballoc_ring =
- to_msm_ringbuffer(msm_submit->suballoc_ring);
-
- suballoc_bo = suballoc_ring->ring_bo;
- suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
- suballoc_ring->offset;
-
- suballoc_offset = align(suballoc_offset, 0x10);
-
- if ((size + suballoc_offset) > suballoc_bo->size) {
- suballoc_bo = NULL;
- }
- }
-
- if (!suballoc_bo) {
- // TODO possibly larger size for streaming bo?
- msm_ring->ring_bo = fd_bo_new_ring(
- submit->pipe->dev, 0x8000, 0);
- msm_ring->offset = 0;
- } else {
- msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
- msm_ring->offset = suballoc_offset;
- }
-
- struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
-
- msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
-
- if (old_suballoc_ring)
- fd_ringbuffer_del(old_suballoc_ring);
-}
-
-static struct fd_ringbuffer *
-msm_submit_new_ringbuffer(struct fd_submit *submit, uint32_t size,
- enum fd_ringbuffer_flags flags)
-{
- struct msm_submit *msm_submit = to_msm_submit(submit);
- struct msm_ringbuffer *msm_ring;
-
- msm_ring = slab_alloc_st(&msm_submit->ring_pool);
-
- msm_ring->u.submit = submit;
-
- /* NOTE: needs to be before _suballoc_ring_bo() since it could
- * increment the refcnt of the current ring
- */
- msm_ring->base.refcnt = 1;
-
- if (flags & FD_RINGBUFFER_STREAMING) {
- msm_submit_suballoc_ring_bo(submit, msm_ring, size);
- } else {
- if (flags & FD_RINGBUFFER_GROWABLE)
- size = INIT_SIZE;
-
- msm_ring->offset = 0;
- msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 0);
- }
-
- if (!msm_ringbuffer_init(msm_ring, size, flags))
- return NULL;
-
- if (flags & FD_RINGBUFFER_PRIMARY) {
- debug_assert(!msm_submit->primary);
- msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
- }
-
- return &msm_ring->base;
-}
-
-static struct drm_msm_gem_submit_reloc *
-handle_stateobj_relocs(struct msm_submit *submit, struct msm_ringbuffer *ring)
-{
- struct msm_cmd *cmd = ring->cmd;
- struct drm_msm_gem_submit_reloc *relocs;
-
- relocs = malloc(cmd->nr_relocs * sizeof(*relocs));
-
- for (unsigned i = 0; i < cmd->nr_relocs; i++) {
- unsigned idx = cmd->relocs[i].reloc_idx;
- struct fd_bo *bo = ring->u.reloc_bos[idx].bo;
- unsigned flags = 0;
-
- if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_READ)
- flags |= FD_RELOC_READ;
- if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_WRITE)
- flags |= FD_RELOC_WRITE;
-
- relocs[i] = cmd->relocs[i];
- relocs[i].reloc_idx = append_bo(submit, bo, flags);
- }
-
- return relocs;
-}
-
-static int
-msm_submit_flush(struct fd_submit *submit, int in_fence_fd,
- int *out_fence_fd, uint32_t *out_fence)
-{
- struct msm_submit *msm_submit = to_msm_submit(submit);
- struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
- struct drm_msm_gem_submit req = {
- .flags = msm_pipe->pipe,
- .queueid = msm_pipe->queue_id,
- };
- int ret;
-
- debug_assert(msm_submit->primary);
-
- finalize_current_cmd(msm_submit->primary);
- append_ring(msm_submit->ring_set, msm_submit->primary);
-
- unsigned nr_cmds = 0;
- unsigned nr_objs = 0;
-
- set_foreach(msm_submit->ring_set, entry) {
- struct fd_ringbuffer *ring = (void *)entry->key;
- if (ring->flags & _FD_RINGBUFFER_OBJECT) {
- nr_cmds += 1;
- nr_objs += 1;
- } else {
- if (ring != msm_submit->primary)
- finalize_current_cmd(ring);
- nr_cmds += to_msm_ringbuffer(ring)->u.nr_cmds;
- }
- }
-
- void *obj_relocs[nr_objs];
- struct drm_msm_gem_submit_cmd cmds[nr_cmds];
- unsigned i = 0, o = 0;
-
- set_foreach(msm_submit->ring_set, entry) {
- struct fd_ringbuffer *ring = (void *)entry->key;
- struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
-
- debug_assert(i < nr_cmds);
-
- // TODO handle relocs:
- if (ring->flags & _FD_RINGBUFFER_OBJECT) {
-
- debug_assert(o < nr_objs);
-
- void *relocs = handle_stateobj_relocs(msm_submit, msm_ring);
- obj_relocs[o++] = relocs;
-
- cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
- cmds[i].submit_idx =
- append_bo(msm_submit, msm_ring->ring_bo, FD_RELOC_READ);
- cmds[i].submit_offset = msm_ring->offset;
- cmds[i].size = offset_bytes(ring->cur, ring->start);
- cmds[i].pad = 0;
- cmds[i].nr_relocs = msm_ring->cmd->nr_relocs;
- cmds[i].relocs = VOID2U64(relocs);
-
- i++;
- } else {
- for (unsigned j = 0; j < msm_ring->u.nr_cmds; j++) {
- if (ring->flags & FD_RINGBUFFER_PRIMARY) {
- cmds[i].type = MSM_SUBMIT_CMD_BUF;
- } else {
- cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
- }
- cmds[i].submit_idx = append_bo(msm_submit,
- msm_ring->u.cmds[j]->ring_bo, FD_RELOC_READ);
- cmds[i].submit_offset = msm_ring->offset;
- cmds[i].size = msm_ring->u.cmds[j]->size;
- cmds[i].pad = 0;
- cmds[i].nr_relocs = msm_ring->u.cmds[j]->nr_relocs;
- cmds[i].relocs = VOID2U64(msm_ring->u.cmds[j]->relocs);
-
- i++;
- }
- }
- }
-
- if (in_fence_fd != -1) {
- req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
- req.fence_fd = in_fence_fd;
- }
-
- if (out_fence_fd) {
- req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
- }
-
- /* needs to be after get_cmd() as that could create bos/cmds table: */
- req.bos = VOID2U64(msm_submit->submit_bos),
- req.nr_bos = msm_submit->nr_submit_bos;
- req.cmds = VOID2U64(cmds),
- req.nr_cmds = nr_cmds;
-
- DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
-
- ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
- &req, sizeof(req));
- if (ret) {
- ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
- msm_dump_submit(&req);
- } else if (!ret) {
- if (out_fence)
- *out_fence = req.fence;
-
- if (out_fence_fd)
- *out_fence_fd = req.fence_fd;
- }
-
- for (unsigned o = 0; o < nr_objs; o++)
- free(obj_relocs[o]);
-
- return ret;
-}
-
-static void
-unref_rings(struct set_entry *entry)
-{
- struct fd_ringbuffer *ring = (void *)entry->key;
- fd_ringbuffer_del(ring);
-}
-
-static void
-msm_submit_destroy(struct fd_submit *submit)
-{
- struct msm_submit *msm_submit = to_msm_submit(submit);
-
- if (msm_submit->primary)
- fd_ringbuffer_del(msm_submit->primary);
- if (msm_submit->suballoc_ring)
- fd_ringbuffer_del(msm_submit->suballoc_ring);
-
- _mesa_hash_table_destroy(msm_submit->bo_table, NULL);
- _mesa_set_destroy(msm_submit->ring_set, unref_rings);
-
- // TODO it would be nice to have a way to debug_assert() if all
- // rb's haven't been free'd back to the slab, because that is
- // an indication that we are leaking bo's
- slab_destroy(&msm_submit->ring_pool);
-
- for (unsigned i = 0; i < msm_submit->nr_bos; i++)
- fd_bo_del(msm_submit->bos[i]);
-
- free(msm_submit->submit_bos);
- free(msm_submit->bos);
- free(msm_submit);
-}
-
-static const struct fd_submit_funcs submit_funcs = {
- .new_ringbuffer = msm_submit_new_ringbuffer,
- .flush = msm_submit_flush,
- .destroy = msm_submit_destroy,
-};
-
-struct fd_submit *
-msm_submit_new(struct fd_pipe *pipe)
-{
- struct msm_submit *msm_submit = calloc(1, sizeof(*msm_submit));
- struct fd_submit *submit;
- static unsigned submit_cnt = 0;
-
- msm_submit->seqno = ++submit_cnt;
- msm_submit->bo_table = _mesa_hash_table_create(NULL,
- _mesa_hash_pointer, _mesa_key_pointer_equal);
- msm_submit->ring_set = _mesa_set_create(NULL,
- _mesa_hash_pointer, _mesa_key_pointer_equal);
- // TODO tune size:
- slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer), 16);
-
- submit = &msm_submit->base;
- submit->pipe = pipe;
- submit->funcs = &submit_funcs;
-
- return submit;
-}
-
-
-static void
-finalize_current_cmd(struct fd_ringbuffer *ring)
-{
- struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
-
- debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
-
- if (!msm_ring->cmd)
- return;
-
- debug_assert(msm_ring->cmd->ring_bo == msm_ring->ring_bo);
-
- unsigned idx = APPEND(&msm_ring->u, cmds);
-
- msm_ring->u.cmds[idx] = msm_ring->cmd;
- msm_ring->cmd = NULL;
-
- msm_ring->u.cmds[idx]->size = offset_bytes(ring->cur, ring->start);
-}
-
-static void
-msm_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t size)
-{
- struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
- struct fd_pipe *pipe = msm_ring->u.submit->pipe;
-
- debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
-
- finalize_current_cmd(ring);
-
- fd_bo_del(msm_ring->ring_bo);
- msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
- msm_ring->cmd = cmd_new(msm_ring->ring_bo);
-
- ring->start = fd_bo_map(msm_ring->ring_bo);
- ring->end = &(ring->start[size/4]);
- ring->cur = ring->start;
- ring->size = size;
-}
-
-static void
-msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring,
- const struct fd_reloc *reloc)
-{
- struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
- struct fd_pipe *pipe;
- unsigned reloc_idx;
-
- if (ring->flags & _FD_RINGBUFFER_OBJECT) {
- unsigned idx = APPEND(&msm_ring->u, reloc_bos);
-
- msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
- msm_ring->u.reloc_bos[idx].flags = reloc->flags;
-
- /* this gets fixed up at submit->flush() time, since this state-
- * object rb can be used with many different submits
- */
- reloc_idx = idx;
-
- pipe = msm_ring->u.pipe;
- } else {
- struct msm_submit *msm_submit =
- to_msm_submit(msm_ring->u.submit);
-
- reloc_idx = append_bo(msm_submit, reloc->bo, reloc->flags);
-
- pipe = msm_ring->u.submit->pipe;
- }
-
- struct drm_msm_gem_submit_reloc *r;
- unsigned idx = APPEND(msm_ring->cmd, relocs);
-
- r = &msm_ring->cmd->relocs[idx];
-
- r->reloc_idx = reloc_idx;
- r->reloc_offset = reloc->offset;
- r->or = reloc->or;
- r->shift = reloc->shift;
- r->submit_offset = offset_bytes(ring->cur, ring->start) +
- msm_ring->offset;
-
- ring->cur++;
-
- if (pipe->gpu_id >= 500) {
- idx = APPEND(msm_ring->cmd, relocs);
- r = &msm_ring->cmd->relocs[idx];
-
- r->reloc_idx = reloc_idx;
- r->reloc_offset = reloc->offset;
- r->or = reloc->orhi;
- r->shift = reloc->shift - 32;
- r->submit_offset = offset_bytes(ring->cur, ring->start) +
- msm_ring->offset;
-
- ring->cur++;
- }
-}
-
-static void
-append_stateobj_rings(struct msm_submit *submit, struct fd_ringbuffer *target)
-{
- struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target);
-
- debug_assert(target->flags & _FD_RINGBUFFER_OBJECT);
-
- set_foreach(msm_target->u.ring_set, entry) {
- struct fd_ringbuffer *ring = (void *)entry->key;
-
- append_ring(submit->ring_set, ring);
-
- if (ring->flags & _FD_RINGBUFFER_OBJECT) {
- append_stateobj_rings(submit, ring);
- }
- }
-}
-
-static uint32_t
-msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
- struct fd_ringbuffer *target, uint32_t cmd_idx)
-{
- struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target);
- struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
- struct fd_bo *bo;
- uint32_t size;
-
- if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
- (cmd_idx < msm_target->u.nr_cmds)) {
- bo = msm_target->u.cmds[cmd_idx]->ring_bo;
- size = msm_target->u.cmds[cmd_idx]->size;
- } else {
- bo = msm_target->ring_bo;
- size = offset_bytes(target->cur, target->start);
- }
-
- msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){
- .bo = bo,
- .flags = FD_RELOC_READ,
- .offset = msm_target->offset,
- });
-
- if ((target->flags & _FD_RINGBUFFER_OBJECT) &&
- !(ring->flags & _FD_RINGBUFFER_OBJECT)) {
- struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit);
-
- append_stateobj_rings(msm_submit, target);
- }
-
- if (ring->flags & _FD_RINGBUFFER_OBJECT) {
- append_ring(msm_ring->u.ring_set, target);
- } else {
- struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit);
- append_ring(msm_submit->ring_set, target);
- }
-
- return size;
-}
-
-static uint32_t
-msm_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
-{
- if (ring->flags & FD_RINGBUFFER_GROWABLE)
- return to_msm_ringbuffer(ring)->u.nr_cmds + 1;
- return 1;
-}
-
-static void
-msm_ringbuffer_destroy(struct fd_ringbuffer *ring)
-{
- struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
-
- fd_bo_del(msm_ring->ring_bo);
- if (msm_ring->cmd)
- cmd_free(msm_ring->cmd);
-
- if (ring->flags & _FD_RINGBUFFER_OBJECT) {
- for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
- fd_bo_del(msm_ring->u.reloc_bos[i].bo);
- }
-
- _mesa_set_destroy(msm_ring->u.ring_set, unref_rings);
-
- free(msm_ring->u.reloc_bos);
- free(msm_ring);
- } else {
- struct fd_submit *submit = msm_ring->u.submit;
-
- for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
- cmd_free(msm_ring->u.cmds[i]);
- }
-
- free(msm_ring->u.cmds);
- slab_free_st(&to_msm_submit(submit)->ring_pool, msm_ring);
- }
-}
-
-static const struct fd_ringbuffer_funcs ring_funcs = {
- .grow = msm_ringbuffer_grow,
- .emit_reloc = msm_ringbuffer_emit_reloc,
- .emit_reloc_ring = msm_ringbuffer_emit_reloc_ring,
- .cmd_count = msm_ringbuffer_cmd_count,
- .destroy = msm_ringbuffer_destroy,
-};
-
-static inline struct fd_ringbuffer *
-msm_ringbuffer_init(struct msm_ringbuffer *msm_ring, uint32_t size,
- enum fd_ringbuffer_flags flags)
-{
- struct fd_ringbuffer *ring = &msm_ring->base;
-
- debug_assert(msm_ring->ring_bo);
-
- uint8_t *base = fd_bo_map(msm_ring->ring_bo);
- ring->start = (void *)(base + msm_ring->offset);
- ring->end = &(ring->start[size/4]);
- ring->cur = ring->start;
-
- ring->size = size;
- ring->flags = flags;
-
- ring->funcs = &ring_funcs;
-
- msm_ring->u.cmds = NULL;
- msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
-
- msm_ring->cmd = cmd_new(msm_ring->ring_bo);
-
- return ring;
-}
-
-struct fd_ringbuffer *
-msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size)
-{
- struct msm_ringbuffer *msm_ring = malloc(sizeof(*msm_ring));
-
- msm_ring->u.pipe = pipe;
- msm_ring->offset = 0;
- msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
- msm_ring->base.refcnt = 1;
-
- msm_ring->u.reloc_bos = NULL;
- msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
-
- msm_ring->u.ring_set = _mesa_set_create(NULL,
- _mesa_hash_pointer, _mesa_key_pointer_equal);
-
- return msm_ringbuffer_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
-}
diff --git a/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.c b/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.c
deleted file mode 100644
index 2b8f53172..000000000
--- a/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.c
+++ /dev/null
@@ -1,568 +0,0 @@
-/*
- * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- * Rob Clark <robclark@freedesktop.org>
- */
-
-#include <assert.h>
-#include <inttypes.h>
-
-#include "util/hash_table.h"
-#include "util/slab.h"
-
-#include "drm/freedreno_ringbuffer.h"
-#include "msm_priv.h"
-
-/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
- * by avoiding the additional tracking necessary to build cmds/relocs tables
- * (but still builds a bos table)
- */
-
-
-#define INIT_SIZE 0x1000
-
-static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER;
-
-
-struct msm_submit_sp {
- struct fd_submit base;
-
- DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
- DECLARE_ARRAY(struct fd_bo *, bos);
-
- unsigned seqno;
-
- /* maps fd_bo to idx in bos table: */
- struct hash_table *bo_table;
-
- struct slab_mempool ring_pool;
-
- struct fd_ringbuffer *primary;
-
- /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
- * the same underlying bo)..
- *
- * We also rely on previous stateobj having been fully constructed
- * so we can reclaim extra space at it's end.
- */
- struct fd_ringbuffer *suballoc_ring;
-};
-FD_DEFINE_CAST(fd_submit, msm_submit_sp);
-
-/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
- * and sizes. Ie. a finalized buffer can have no more commands appended to
- * it.
- */
-struct msm_cmd_sp {
- struct fd_bo *ring_bo;
- unsigned size;
-};
-
-/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
- * later copy into the submit when the stateobj rb is later referenced by
- * a regular rb:
- */
-struct msm_reloc_bo_sp {
- struct fd_bo *bo;
- unsigned flags;
-};
-
-struct msm_ringbuffer_sp {
- struct fd_ringbuffer base;
-
- /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
- unsigned offset;
-
-// TODO check disasm.. hopefully compilers CSE can realize that
-// reloc_bos and cmds are at the same offsets and optimize some
-// divergent cases into single case
- union {
- /* for _FD_RINGBUFFER_OBJECT case: */
- struct {
- struct fd_pipe *pipe;
- DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos);
- };
- /* for other cases: */
- struct {
- struct fd_submit *submit;
- DECLARE_ARRAY(struct msm_cmd_sp, cmds);
- };
- } u;
-
- struct fd_bo *ring_bo;
-};
-FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
-
-static void finalize_current_cmd(struct fd_ringbuffer *ring);
-static struct fd_ringbuffer * msm_ringbuffer_sp_init(
- struct msm_ringbuffer_sp *msm_ring,
- uint32_t size, enum fd_ringbuffer_flags flags);
-
-/* add (if needed) bo to submit and return index: */
-static uint32_t
-append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags)
-{
- struct msm_bo *msm_bo = to_msm_bo(bo);
- uint32_t idx;
- pthread_mutex_lock(&idx_lock);
- if (likely(msm_bo->current_submit_seqno == submit->seqno)) {
- idx = msm_bo->idx;
- } else {
- uint32_t hash = _mesa_hash_pointer(bo);
- struct hash_entry *entry;
-
- entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
- if (entry) {
- /* found */
- idx = (uint32_t)(uintptr_t)entry->data;
- } else {
- idx = APPEND(submit, submit_bos);
- idx = APPEND(submit, bos);
-
- submit->submit_bos[idx].flags = 0;
- submit->submit_bos[idx].handle = bo->handle;
- submit->submit_bos[idx].presumed = 0;
-
- submit->bos[idx] = fd_bo_ref(bo);
-
- _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
- (void *)(uintptr_t)idx);
- }
- msm_bo->current_submit_seqno = submit->seqno;
- msm_bo->idx = idx;
- }
- pthread_mutex_unlock(&idx_lock);
- if (flags & FD_RELOC_READ)
- submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
- if (flags & FD_RELOC_WRITE)
- submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
- if (flags & FD_RELOC_DUMP)
- submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_DUMP;
- return idx;
-}
-
-static void
-msm_submit_suballoc_ring_bo(struct fd_submit *submit,
- struct msm_ringbuffer_sp *msm_ring, uint32_t size)
-{
- struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
- unsigned suballoc_offset = 0;
- struct fd_bo *suballoc_bo = NULL;
-
- if (msm_submit->suballoc_ring) {
- struct msm_ringbuffer_sp *suballoc_ring =
- to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
-
- suballoc_bo = suballoc_ring->ring_bo;
- suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
- suballoc_ring->offset;
-
- suballoc_offset = align(suballoc_offset, 0x10);
-
- if ((size + suballoc_offset) > suballoc_bo->size) {
- suballoc_bo = NULL;
- }
- }
-
- if (!suballoc_bo) {
- // TODO possibly larger size for streaming bo?
- msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev,
- 0x8000, DRM_FREEDRENO_GEM_GPUREADONLY);
- msm_ring->offset = 0;
- } else {
- msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
- msm_ring->offset = suballoc_offset;
- }
-
- struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
-
- msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
-
- if (old_suballoc_ring)
- fd_ringbuffer_del(old_suballoc_ring);
-}
-
-static struct fd_ringbuffer *
-msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
- enum fd_ringbuffer_flags flags)
-{
- struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
- struct msm_ringbuffer_sp *msm_ring;
-
- msm_ring = slab_alloc_st(&msm_submit->ring_pool);
-
- msm_ring->u.submit = submit;
-
- /* NOTE: needs to be before _suballoc_ring_bo() since it could
- * increment the refcnt of the current ring
- */
- msm_ring->base.refcnt = 1;
-
- if (flags & FD_RINGBUFFER_STREAMING) {
- msm_submit_suballoc_ring_bo(submit, msm_ring, size);
- } else {
- if (flags & FD_RINGBUFFER_GROWABLE)
- size = INIT_SIZE;
-
- msm_ring->offset = 0;
- msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size,
- DRM_FREEDRENO_GEM_GPUREADONLY);
- }
-
- if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
- return NULL;
-
- if (flags & FD_RINGBUFFER_PRIMARY) {
- debug_assert(!msm_submit->primary);
- msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
- }
-
- return &msm_ring->base;
-}
-
-static int
-msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
- int *out_fence_fd, uint32_t *out_fence)
-{
- struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
- struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
- struct drm_msm_gem_submit req = {
- .flags = msm_pipe->pipe,
- .queueid = msm_pipe->queue_id,
- };
- int ret;
-
- debug_assert(msm_submit->primary);
- finalize_current_cmd(msm_submit->primary);
-
- struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary);
- struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds];
-
- for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
- cmds[i].type = MSM_SUBMIT_CMD_BUF;
- cmds[i].submit_idx = append_bo(msm_submit,
- primary->u.cmds[i].ring_bo, FD_RELOC_READ | FD_RELOC_DUMP);
- cmds[i].submit_offset = primary->offset;
- cmds[i].size = primary->u.cmds[i].size;
- cmds[i].pad = 0;
- cmds[i].nr_relocs = 0;
- }
-
- if (in_fence_fd != -1) {
- req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
- req.fence_fd = in_fence_fd;
- }
-
- if (out_fence_fd) {
- req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
- }
-
- /* needs to be after get_cmd() as that could create bos/cmds table: */
- req.bos = VOID2U64(msm_submit->submit_bos),
- req.nr_bos = msm_submit->nr_submit_bos;
- req.cmds = VOID2U64(cmds),
- req.nr_cmds = primary->u.nr_cmds;
-
- DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
-
- ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
- &req, sizeof(req));
- if (ret) {
- ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
- msm_dump_submit(&req);
- } else if (!ret) {
- if (out_fence)
- *out_fence = req.fence;
-
- if (out_fence_fd)
- *out_fence_fd = req.fence_fd;
- }
-
- return ret;
-}
-
-static void
-msm_submit_sp_destroy(struct fd_submit *submit)
-{
- struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
-
- if (msm_submit->primary)
- fd_ringbuffer_del(msm_submit->primary);
- if (msm_submit->suballoc_ring)
- fd_ringbuffer_del(msm_submit->suballoc_ring);
-
- _mesa_hash_table_destroy(msm_submit->bo_table, NULL);
-
- // TODO it would be nice to have a way to debug_assert() if all
- // rb's haven't been free'd back to the slab, because that is
- // an indication that we are leaking bo's
- slab_destroy(&msm_submit->ring_pool);
-
- for (unsigned i = 0; i < msm_submit->nr_bos; i++)
- fd_bo_del(msm_submit->bos[i]);
-
- free(msm_submit->submit_bos);
- free(msm_submit->bos);
- free(msm_submit);
-}
-
-static const struct fd_submit_funcs submit_funcs = {
- .new_ringbuffer = msm_submit_sp_new_ringbuffer,
- .flush = msm_submit_sp_flush,
- .destroy = msm_submit_sp_destroy,
-};
-
-struct fd_submit *
-msm_submit_sp_new(struct fd_pipe *pipe)
-{
- struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
- struct fd_submit *submit;
- static unsigned submit_cnt = 0;
-
- msm_submit->seqno = ++submit_cnt;
- msm_submit->bo_table = _mesa_hash_table_create(NULL,
- _mesa_hash_pointer, _mesa_key_pointer_equal);
- // TODO tune size:
- slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
-
- submit = &msm_submit->base;
- submit->pipe = pipe;
- submit->funcs = &submit_funcs;
-
- return submit;
-}
-
-
-static void
-finalize_current_cmd(struct fd_ringbuffer *ring)
-{
- debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
-
- struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
- unsigned idx = APPEND(&msm_ring->u, cmds);
-
- msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo);
- msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start);
-}
-
-static void
-msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
-{
- struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
- struct fd_pipe *pipe = msm_ring->u.submit->pipe;
-
- debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
-
- finalize_current_cmd(ring);
-
- fd_bo_del(msm_ring->ring_bo);
- msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size,
- DRM_FREEDRENO_GEM_GPUREADONLY);
-
- ring->start = fd_bo_map(msm_ring->ring_bo);
- ring->end = &(ring->start[size/4]);
- ring->cur = ring->start;
- ring->size = size;
-}
-
-static void
-msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring,
- const struct fd_reloc *reloc)
-{
- struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
- struct fd_pipe *pipe;
-
- if (ring->flags & _FD_RINGBUFFER_OBJECT) {
- unsigned idx = APPEND(&msm_ring->u, reloc_bos);
-
- msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
- msm_ring->u.reloc_bos[idx].flags = reloc->flags;
-
- pipe = msm_ring->u.pipe;
- } else {
- struct msm_submit_sp *msm_submit =
- to_msm_submit_sp(msm_ring->u.submit);
-
- append_bo(msm_submit, reloc->bo, reloc->flags);
-
- pipe = msm_ring->u.submit->pipe;
- }
-
- uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset;
- uint32_t dword = iova;
- int shift = reloc->shift;
-
- if (shift < 0)
- dword >>= -shift;
- else
- dword <<= shift;
-
- (*ring->cur++) = dword | reloc->or;
-
- if (pipe->gpu_id >= 500) {
- dword = iova >> 32;
- shift -= 32;
-
- if (shift < 0)
- dword >>= -shift;
- else
- dword <<= shift;
-
- (*ring->cur++) = dword | reloc->orhi;
- }
-}
-
-static uint32_t
-msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
- struct fd_ringbuffer *target, uint32_t cmd_idx)
-{
- struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
- struct fd_bo *bo;
- uint32_t size;
-
- if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
- (cmd_idx < msm_target->u.nr_cmds)) {
- bo = msm_target->u.cmds[cmd_idx].ring_bo;
- size = msm_target->u.cmds[cmd_idx].size;
- } else {
- bo = msm_target->ring_bo;
- size = offset_bytes(target->cur, target->start);
- }
-
- msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
- .bo = bo,
- .flags = FD_RELOC_READ | FD_RELOC_DUMP,
- .offset = msm_target->offset,
- });
-
- if (!(target->flags & _FD_RINGBUFFER_OBJECT))
- return size;
-
- struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-
- if (ring->flags & _FD_RINGBUFFER_OBJECT) {
- for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
- unsigned idx = APPEND(&msm_ring->u, reloc_bos);
-
- msm_ring->u.reloc_bos[idx].bo =
- fd_bo_ref(msm_target->u.reloc_bos[i].bo);
- msm_ring->u.reloc_bos[idx].flags =
- msm_target->u.reloc_bos[i].flags;
- }
- } else {
- // TODO it would be nice to know whether we have already
- // seen this target before. But hopefully we hit the
- // append_bo() fast path enough for this to not matter:
- struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
-
- for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
- append_bo(msm_submit, msm_target->u.reloc_bos[i].bo,
- msm_target->u.reloc_bos[i].flags);
- }
- }
-
- return size;
-}
-
-static uint32_t
-msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
-{
- if (ring->flags & FD_RINGBUFFER_GROWABLE)
- return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
- return 1;
-}
-
-static void
-msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
-{
- struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-
- fd_bo_del(msm_ring->ring_bo);
-
- if (ring->flags & _FD_RINGBUFFER_OBJECT) {
- for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
- fd_bo_del(msm_ring->u.reloc_bos[i].bo);
- }
-
- free(msm_ring);
- } else {
- struct fd_submit *submit = msm_ring->u.submit;
-
- for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
- fd_bo_del(msm_ring->u.cmds[i].ring_bo);
- }
-
- slab_free_st(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
- }
-}
-
-static const struct fd_ringbuffer_funcs ring_funcs = {
- .grow = msm_ringbuffer_sp_grow,
- .emit_reloc = msm_ringbuffer_sp_emit_reloc,
- .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
- .cmd_count = msm_ringbuffer_sp_cmd_count,
- .destroy = msm_ringbuffer_sp_destroy,
-};
-
-static inline struct fd_ringbuffer *
-msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
- enum fd_ringbuffer_flags flags)
-{
- struct fd_ringbuffer *ring = &msm_ring->base;
-
- debug_assert(msm_ring->ring_bo);
-
- uint8_t *base = fd_bo_map(msm_ring->ring_bo);
- ring->start = (void *)(base + msm_ring->offset);
- ring->end = &(ring->start[size/4]);
- ring->cur = ring->start;
-
- ring->size = size;
- ring->flags = flags;
-
- ring->funcs = &ring_funcs;
-
- // TODO initializing these could probably be conditional on flags
- // since unneed for FD_RINGBUFFER_STAGING case..
- msm_ring->u.cmds = NULL;
- msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
-
- msm_ring->u.reloc_bos = NULL;
- msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
-
- return ring;
-}
-
-struct fd_ringbuffer *
-msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
-{
- struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
-
- msm_ring->u.pipe = pipe;
- msm_ring->offset = 0;
- msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size,
- DRM_FREEDRENO_GEM_GPUREADONLY);
- msm_ring->base.refcnt = 1;
-
- return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
-}
diff --git a/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.h b/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.h
deleted file mode 100644
index 79dba3b84..000000000
--- a/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright © 2021 Google, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifdef X
-# undef X
-#endif
-
-#if PTRSZ == 32
-# define X(n) n ## _32
-#else
-# define X(n) n ## _64
-#endif
-
-
-static void
-X(emit_reloc_common)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc)
-{
- (*ring->cur++) = (uint32_t)reloc->iova;
-#if PTRSZ == 64
- (*ring->cur++) = (uint32_t)(reloc->iova >> 32);
-#endif
-}
-
-static void
-X(msm_ringbuffer_sp_emit_reloc_nonobj)(struct fd_ringbuffer *ring,
- const struct fd_reloc *reloc)
-{
- X(emit_reloc_common)(ring, reloc);
-
- assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
-
- struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-
- struct msm_submit_sp *msm_submit =
- to_msm_submit_sp(msm_ring->u.submit);
-
- msm_submit_append_bo(msm_submit, reloc->bo);
-}
-
-static void
-X(msm_ringbuffer_sp_emit_reloc_obj)(struct fd_ringbuffer *ring,
- const struct fd_reloc *reloc)
-{
- X(emit_reloc_common)(ring, reloc);
-
- assert(ring->flags & _FD_RINGBUFFER_OBJECT);
-
- struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-
- /* Avoid emitting duplicate BO references into the list. Ringbuffer
- * objects are long-lived, so this saves ongoing work at draw time in
- * exchange for a bit at context setup/first draw. And the number of
- * relocs per ringbuffer object is fairly small, so the O(n^2) doesn't
- * hurt much.
- */
- bool found = false;
- for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
- if (msm_ring->u.reloc_bos[i] == reloc->bo) {
- found = true;
- break;
- }
- }
- if (!found) {
- APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(reloc->bo));
- }
-}
-
-static uint32_t
-X(msm_ringbuffer_sp_emit_reloc_ring)(struct fd_ringbuffer *ring,
- struct fd_ringbuffer *target, uint32_t cmd_idx)
-{
- struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
- struct fd_bo *bo;
- uint32_t size;
-
- if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
- (cmd_idx < msm_target->u.nr_cmds)) {
- bo = msm_target->u.cmds[cmd_idx].ring_bo;
- size = msm_target->u.cmds[cmd_idx].size;
- } else {
- bo = msm_target->ring_bo;
- size = offset_bytes(target->cur, target->start);
- }
-
- if (ring->flags & _FD_RINGBUFFER_OBJECT) {
- X(msm_ringbuffer_sp_emit_reloc_obj)(ring, &(struct fd_reloc){
- .bo = bo,
- .iova = bo->iova + msm_target->offset,
- .offset = msm_target->offset,
- });
- } else {
- X(msm_ringbuffer_sp_emit_reloc_nonobj)(ring, &(struct fd_reloc){
- .bo = bo,
- .iova = bo->iova + msm_target->offset,
- .offset = msm_target->offset,
- });
- }
-
- if (!(target->flags & _FD_RINGBUFFER_OBJECT))
- return size;
-
- struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-
- if (ring->flags & _FD_RINGBUFFER_OBJECT) {
- for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
- APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(msm_target->u.reloc_bos[i]));
- }
- } else {
- // TODO it would be nice to know whether we have already
- // seen this target before. But hopefully we hit the
- // append_bo() fast path enough for this to not matter:
- struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
-
- for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
- msm_submit_append_bo(msm_submit, msm_target->u.reloc_bos[i]);
- }
- }
-
- return size;
-}
diff --git a/lib/mesa/src/freedreno/fdl/fd6_layout_test.c b/lib/mesa/src/freedreno/fdl/fd6_layout_test.c
index 91639843d..f4eda1135 100644
--- a/lib/mesa/src/freedreno/fdl/fd6_layout_test.c
+++ b/lib/mesa/src/freedreno/fdl/fd6_layout_test.c
@@ -683,6 +683,243 @@ static const struct testcase
},
},
},
+
+ /* Easy 32x32x32 3d case */
+ {
+ .format = PIPE_FORMAT_R9G9B9E5_FLOAT,
+ .is_3d = true,
+ .layout =
+ {
+ .tile_mode = TILE6_3,
+ .ubwc = false,
+ .width0 = 32,
+ .height0 = 32,
+ .depth0 = 32,
+ .slices =
+ {
+ {.offset = 0, .pitch = 256, .size0 = 8192},
+ {.offset = 262144, .pitch = 256, .size0 = 4096},
+ {.offset = 327680, .pitch = 256, .size0 = 4096},
+ {.offset = 360448, .pitch = 256, .size0 = 4096},
+ {.offset = 376832, .pitch = 256, .size0 = 4096},
+ {.offset = 385024, .pitch = 256},
+ },
+ },
+ },
+
+ /* Scale up a bit to 128x128x32 3d */
+ {
+ .format = PIPE_FORMAT_R9G9B9E5_FLOAT,
+ .is_3d = true,
+ .layout =
+ {
+ .tile_mode = TILE6_3,
+ .ubwc = false,
+ .width0 = 128,
+ .height0 = 128,
+ .depth0 = 32,
+ .slices =
+ {
+ {.offset = 0, .pitch = 512, .size0 = 65536},
+ {.offset = 2097152, .pitch = 256, .size0 = 16384},
+ {.offset = 2359296, .pitch = 256, .size0 = 8192},
+ {.offset = 2424832, .pitch = 256, .size0 = 8192},
+ {.offset = 2457600, .pitch = 256, .size0 = 8192},
+ {.offset = 2473984, .pitch = 256},
+ {.offset = 2482176, .pitch = 256},
+ {.offset = 2490368, .pitch = 256},
+ },
+ },
+ },
+
+ /* Changing width to 1 changes where minimum layer size happens. */
+ {
+ .format = PIPE_FORMAT_R9G9B9E5_FLOAT,
+ .is_3d = true,
+ .layout =
+ {
+ .tile_mode = TILE6_LINEAR,
+ .ubwc = false,
+ .width0 = 1,
+ .height0 = 128,
+ .depth0 = 32,
+ .slices =
+ {
+ {.offset = 0, .pitch = 256, .size0 = 32768},
+ {.offset = 1048576, .pitch = 256, .size0 = 16384},
+ {.offset = 1310720, .pitch = 256, .size0 = 16384},
+ {.offset = 1441792, .pitch = 256, .size0 = 16384},
+ {.offset = 1507328, .pitch = 256, .size0 = 16384},
+ {.offset = 1540096, .pitch = 256},
+ {.offset = 1556480, .pitch = 256},
+ {.offset = 1572864, .pitch = 256},
+ },
+ },
+ },
+
+ /* And increasing width makes it happen later. */
+ {
+ .format = PIPE_FORMAT_R9G9B9E5_FLOAT,
+ .is_3d = true,
+ .layout =
+ {
+ .tile_mode = TILE6_3,
+ .ubwc = false,
+ .width0 = 1024,
+ .height0 = 128,
+ .depth0 = 32,
+ .slices =
+ {
+ {.offset = 0, .pitch = 4096, .size0 = 524288},
+ {.offset = 16777216, .pitch = 2048, .size0 = 131072},
+ {.offset = 18874368, .pitch = 1024, .size0 = 32768},
+ {.offset = 19136512, .pitch = 512, .size0 = 8192},
+ {.offset = 19169280, .pitch = 256, .size0 = 4096},
+ {.offset = 19177472, .pitch = 256},
+ {.offset = 19181568, .pitch = 256},
+ {.offset = 19185664, .pitch = 256},
+ {.offset = 19189760, .pitch = 256},
+ {.offset = 19193856, .pitch = 256},
+ {.offset = 19197952, .pitch = 256},
+ },
+ },
+ },
+
+ /* NPOT height case that piglit was catching 3d texture failure in, we
+ * use a higher depth though to get more slice pitches detected from
+ * the blob.
+ */
+ {
+ .format = PIPE_FORMAT_R9G9B9E5_FLOAT,
+ .is_3d = true,
+ .layout =
+ {
+ .tile_mode = TILE6_3,
+ .ubwc = false,
+ .width0 = 128,
+ .height0 = 129,
+ .depth0 = 16,
+ .slices =
+ {
+ {.offset = 0, .pitch = 512, .size0 = 73728},
+ {.offset = 1179648, .pitch = 256, .size0 = 20480},
+ {.offset = 1343488, .pitch = 256, .size0 = 20480},
+ {.offset = 1425408, .pitch = 256, .size0 = 20480},
+ {.offset = 1466368, .pitch = 256},
+ {.offset = 1486848, .pitch = 256},
+ {.offset = 1507328, .pitch = 256},
+ {.offset = 1527808, .pitch = 256},
+ },
+ },
+ },
+
+ /* NPOT height case that my first 3d layout ideas failed on. */
+ {
+ .format = PIPE_FORMAT_R9G9B9E5_FLOAT,
+ .is_3d = true,
+ .layout =
+ {
+ .tile_mode = TILE6_3,
+ .ubwc = false,
+ .width0 = 128,
+ .height0 = 132,
+ .depth0 = 16,
+ .slices =
+ {
+ {.offset = 0, .pitch = 512, .size0 = 73728},
+ {.offset = 1179648, .pitch = 256, .size0 = 20480},
+ {.offset = 1343488, .pitch = 256, .size0 = 20480},
+ {.offset = 1425408, .pitch = 256, .size0 = 20480},
+ {.offset = 1466368, .pitch = 256},
+ {.offset = 1486848, .pitch = 256},
+ {.offset = 1507328, .pitch = 256},
+ {.offset = 1527808, .pitch = 256},
+ },
+ },
+ },
+
+ /* blob used MIN_LAYERSZ = 0x3000 here.
+ *
+ * This is an interesting case for 3d layout, since pitch stays NPOT for a while.
+ */
+ {
+ .format = PIPE_FORMAT_R9G9B9E5_FLOAT,
+ .is_3d = true,
+ .layout =
+ {
+ .tile_mode = TILE6_3,
+ .ubwc = false,
+ .width0 = 768,
+ .height0 = 32,
+ .depth0 = 128,
+ .slices =
+ {
+ {.offset = 0, .pitch = 3072, .size0 = 98304},
+ {.offset = 12582912, .pitch = 1536, .size0 = 24576},
+ {.offset = 14155776, .pitch = 768, .size0 = 12288},
+ {.offset = 14548992, .pitch = 512, .size0 = 12288},
+ {.offset = 14745600, .pitch = 256, .size0 = 12288},
+ {.offset = 14843904, .pitch = 256, .size0 = 12288},
+ {.offset = 14893056, .pitch = 256, .size0 = 12288},
+ {.offset = 14917632, .pitch = 256},
+ {.offset = 14929920, .pitch = 256},
+ {.offset = 14942208, .pitch = 256},
+ },
+ },
+ },
+
+ /* dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.rgba32f_rg11_eac.texture3d_to_texture2d */
+#if 0 /* XXX: We disagree with the blob about level 0 size0, but the testcase passes. */
+ {
+ .format = PIPE_FORMAT_R32G32B32A32_FLOAT,
+ .is_3d = true,
+ .layout =
+ {
+ .tile_mode = TILE6_3,
+ .ubwc = false,
+ .width0 = 129,
+ .height0 = 129,
+ .depth0 = 17,
+ .slices =
+ {
+ {.offset = 0, .pitch = 3072, .size0 = 524288},
+ {.offset = 8912896, .pitch = 2048, .size0 = 131072},
+ {.offset = 9961472, .pitch = 1024, .size0 = 32768},
+ {.offset = 10092544, .pitch = 1024, .size0 = 16384},
+ {.offset = 10125312, .pitch = 1024},
+ {.offset = 10141696, .pitch = 1024},
+ {.offset = 10158080, .pitch = 1024},
+ {.offset = 10174464, .pitch = 1024},
+ },
+ },
+ },
+#endif
+
+ /* Size minification issue found while looking at the above test. */
+ {
+ .format = PIPE_FORMAT_R32G32B32A32_FLOAT,
+ .is_3d = true,
+ .layout =
+ {
+ .tile_mode = TILE6_3,
+ .ubwc = false,
+ .width0 = 129,
+ .height0 = 9,
+ .depth0 = 8,
+ .slices =
+ {
+ {.offset = 0, .pitch = 3072, .size0 = 49152},
+ {.offset = 393216, .pitch = 2048, .size0 = 32768},
+ {.offset = 524288, .pitch = 1024, .size0 = 32768},
+ {.offset = 589824, .pitch = 1024},
+ {.offset = 622592, .pitch = 1024},
+ {.offset = 655360, .pitch = 1024},
+ {.offset = 688128, .pitch = 1024},
+ {.offset = 720896, .pitch = 1024},
+ },
+ },
+ },
+
};
int
diff --git a/lib/mesa/src/freedreno/ir3/ir3_dce.c b/lib/mesa/src/freedreno/ir3/ir3_dce.c
index 76298e64a..a3ddbe802 100644
--- a/lib/mesa/src/freedreno/ir3/ir3_dce.c
+++ b/lib/mesa/src/freedreno/ir3/ir3_dce.c
@@ -53,8 +53,10 @@ instr_dce(struct ir3_instruction *instr, bool falsedep)
if (ir3_instr_check_mark(instr))
return;
- if (writes_gpr(instr))
- mark_array_use(instr, instr->dsts[0]); /* dst */
+ foreach_dst (dst, instr) {
+ if (is_dest_gpr(dst))
+ mark_array_use(instr, dst);
+ }
foreach_src (reg, instr)
mark_array_use(instr, reg); /* src */
diff --git a/lib/mesa/src/freedreno/ir3/ir3_delay.c b/lib/mesa/src/freedreno/ir3/ir3_delay.c
index 14bb403b9..054f4c831 100644
--- a/lib/mesa/src/freedreno/ir3/ir3_delay.c
+++ b/lib/mesa/src/freedreno/ir3/ir3_delay.c
@@ -30,19 +30,6 @@
*/
#define MAX_NOPS 6
-/* The soft delay for approximating the cost of (ss). On a6xx, it takes the
- * number of delay slots to get a SFU result back (ie. using nop's instead of
- * (ss) is:
- *
- * 8 - single warp
- * 9 - two warps
- * 10 - four warps
- *
- * and so on. Not quite sure where it tapers out (ie. how many warps share an
- * SFU unit). But 10 seems like a reasonable # to choose:
- */
-#define SOFT_SS_NOPS 10
-
/*
* Helpers to figure out the necessary delay slots between instructions. Used
* both in scheduling pass(es) and the final pass to insert any required nop's
@@ -76,11 +63,11 @@ ir3_delayslots(struct ir3_instruction *assigner,
if (writes_addr0(assigner) || writes_addr1(assigner))
return 6;
- if (soft && is_sfu(assigner))
- return SOFT_SS_NOPS;
+ if (soft && is_ss_producer(assigner))
+ return soft_ss_delay(assigner);
/* handled via sync flags: */
- if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
+ if (is_ss_producer(assigner) || is_sy_producer(assigner))
return 0;
/* As far as we know, shader outputs don't need any delay. */
@@ -89,7 +76,7 @@ ir3_delayslots(struct ir3_instruction *assigner,
/* assigner must be alu: */
if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
- is_mem(consumer) || (assigner->dsts[0]->flags & IR3_REG_SHARED)) {
+ is_mem(consumer)) {
return 6;
} else {
/* In mergedregs mode, there is an extra 2-cycle penalty when half of
@@ -119,74 +106,6 @@ count_instruction(struct ir3_instruction *n)
(is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_B));
}
-static unsigned
-distance(struct ir3_block *block, struct ir3_instruction *instr, unsigned maxd)
-{
- unsigned d = 0;
-
- /* Note that this relies on incrementally building up the block's
- * instruction list.. but this is how scheduling and nopsched
- * work.
- */
- foreach_instr_rev (n, &block->instr_list) {
- if ((n == instr) || (d >= maxd))
- return MIN2(maxd, d + n->nop);
- if (count_instruction(n))
- d = MIN2(maxd, d + 1 + n->repeat + n->nop);
- }
-
- return maxd;
-}
-
-static unsigned
-delay_calc_srcn_prera(struct ir3_block *block, struct ir3_instruction *assigner,
- struct ir3_instruction *consumer, unsigned srcn)
-{
- unsigned delay = 0;
-
- if (assigner->opc == OPC_META_PHI)
- return 0;
-
- if (is_meta(assigner)) {
- foreach_src_n (src, n, assigner) {
- unsigned d;
-
- if (!src->def)
- continue;
-
- d = delay_calc_srcn_prera(block, src->def->instr, consumer, srcn);
- delay = MAX2(delay, d);
- }
- } else {
- delay = ir3_delayslots(assigner, consumer, srcn, false);
- delay -= distance(block, assigner, delay);
- }
-
- return delay;
-}
-
-/**
- * Calculate delay for instruction before register allocation, using SSA
- * source pointers. This can't handle inter-block dependencies.
- */
-unsigned
-ir3_delay_calc_prera(struct ir3_block *block, struct ir3_instruction *instr)
-{
- unsigned delay = 0;
-
- foreach_src_n (src, i, instr) {
- unsigned d = 0;
-
- if (src->def && src->def->instr->block == block) {
- d = delay_calc_srcn_prera(block, src->def->instr, instr, i);
- }
-
- delay = MAX2(delay, d);
- }
-
- return delay;
-}
-
/* Post-RA, we don't have arrays any more, so we have to be a bit careful here
* and have to handle relative accesses specially.
*/
@@ -207,35 +126,21 @@ post_ra_reg_num(struct ir3_register *reg)
return reg->num;
}
-static unsigned
-delay_calc_srcn_postra(struct ir3_instruction *assigner,
- struct ir3_instruction *consumer, unsigned assigner_n,
- unsigned consumer_n, bool soft, bool mergedregs)
+unsigned
+ir3_delayslots_with_repeat(struct ir3_instruction *assigner,
+ struct ir3_instruction *consumer,
+ unsigned assigner_n, unsigned consumer_n)
{
+ unsigned delay = ir3_delayslots(assigner, consumer, consumer_n, false);
+
struct ir3_register *src = consumer->srcs[consumer_n];
struct ir3_register *dst = assigner->dsts[assigner_n];
- bool mismatched_half =
- (src->flags & IR3_REG_HALF) != (dst->flags & IR3_REG_HALF);
- /* In the mergedregs case or when the register is a special register,
- * half-registers do not alias with full registers.
- */
- if ((!mergedregs || is_reg_special(src) || is_reg_special(dst)) &&
- mismatched_half)
- return 0;
+ if (assigner->repeat == 0 && consumer->repeat == 0)
+ return delay;
unsigned src_start = post_ra_reg_num(src) * reg_elem_size(src);
- unsigned src_end = src_start + post_ra_reg_elems(src) * reg_elem_size(src);
unsigned dst_start = post_ra_reg_num(dst) * reg_elem_size(dst);
- unsigned dst_end = dst_start + post_ra_reg_elems(dst) * reg_elem_size(dst);
-
- if (dst_start >= src_end || src_start >= dst_end)
- return 0;
-
- unsigned delay = ir3_delayslots(assigner, consumer, consumer_n, soft);
-
- if (assigner->repeat == 0 && consumer->repeat == 0)
- return delay;
/* If either side is a relative access, we can't really apply most of the
* reasoning below because we don't know which component aliases which.
@@ -250,6 +155,9 @@ delay_calc_srcn_postra(struct ir3_instruction *assigner,
if (assigner->opc == OPC_MOVMSK)
return delay;
+ bool mismatched_half =
+ (src->flags & IR3_REG_HALF) != (dst->flags & IR3_REG_HALF);
+
/* TODO: Handle the combination of (rpt) and different component sizes
* better like below. This complicates things significantly because the
* components don't line up.
@@ -303,10 +211,41 @@ delay_calc_srcn_postra(struct ir3_instruction *assigner,
}
static unsigned
-delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
- struct ir3_instruction *consumer, unsigned distance,
- bool soft, bool pred, bool mergedregs)
+delay_calc_srcn(struct ir3_instruction *assigner,
+ struct ir3_instruction *consumer, unsigned assigner_n,
+ unsigned consumer_n, bool mergedregs)
+{
+ struct ir3_register *src = consumer->srcs[consumer_n];
+ struct ir3_register *dst = assigner->dsts[assigner_n];
+ bool mismatched_half =
+ (src->flags & IR3_REG_HALF) != (dst->flags & IR3_REG_HALF);
+
+ /* In the mergedregs case or when the register is a special register,
+ * half-registers do not alias with full registers.
+ */
+ if ((!mergedregs || is_reg_special(src) || is_reg_special(dst)) &&
+ mismatched_half)
+ return 0;
+
+ unsigned src_start = post_ra_reg_num(src) * reg_elem_size(src);
+ unsigned src_end = src_start + post_ra_reg_elems(src) * reg_elem_size(src);
+ unsigned dst_start = post_ra_reg_num(dst) * reg_elem_size(dst);
+ unsigned dst_end = dst_start + post_ra_reg_elems(dst) * reg_elem_size(dst);
+
+ if (dst_start >= src_end || src_start >= dst_end)
+ return 0;
+
+ return ir3_delayslots_with_repeat(assigner, consumer, assigner_n, consumer_n);
+}
+
+static unsigned
+delay_calc(struct ir3_block *block, struct ir3_instruction *start,
+ struct ir3_instruction *consumer, unsigned distance,
+ regmask_t *in_mask, bool mergedregs)
{
+ regmask_t mask;
+ memcpy(&mask, in_mask, sizeof(mask));
+
unsigned delay = 0;
/* Search backwards starting at the instruction before start, unless it's
* NULL then search backwards from the block end.
@@ -318,7 +257,7 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
if (count_instruction(assigner))
distance += assigner->nop;
- if (distance + delay >= (soft ? SOFT_SS_NOPS : MAX_NOPS))
+ if (distance + delay >= MAX_NOPS)
return delay;
if (is_meta(assigner))
@@ -329,14 +268,17 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
foreach_dst_n (dst, dst_n, assigner) {
if (dst->wrmask == 0)
continue;
+ if (!regmask_get(&mask, dst))
+ continue;
foreach_src_n (src, src_n, consumer) {
if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST))
continue;
- unsigned src_delay = delay_calc_srcn_postra(
- assigner, consumer, dst_n, src_n, soft, mergedregs);
+ unsigned src_delay = delay_calc_srcn(
+ assigner, consumer, dst_n, src_n, mergedregs);
new_delay = MAX2(new_delay, src_delay);
}
+ regmask_clear(&mask, dst);
}
new_delay = new_delay > distance ? new_delay - distance : 0;
@@ -360,13 +302,13 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
* However any other recursion would be unnecessary.
*/
- if (pred && block->data != block) {
+ if (block->data != block) {
block->data = block;
for (unsigned i = 0; i < block->predecessors_count; i++) {
struct ir3_block *pred = block->predecessors[i];
- unsigned pred_delay = delay_calc_postra(pred, NULL, consumer, distance,
- soft, pred, mergedregs);
+ unsigned pred_delay = delay_calc(pred, NULL, consumer, distance,
+ &mask, mergedregs);
delay = MAX2(delay, pred_delay);
}
@@ -377,50 +319,19 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
}
/**
- * Calculate delay for post-RA scheduling based on physical registers but not
- * exact (i.e. don't recurse into predecessors, and make it possible to
- * estimate impact of sync flags).
- *
- * @soft: If true, add additional delay for situations where they
- * would not be strictly required because a sync flag would be
- * used (but scheduler would prefer to schedule some other
- * instructions first to avoid stalling on sync flag)
- * @mergedregs: True if mergedregs is enabled.
- */
-unsigned
-ir3_delay_calc_postra(struct ir3_block *block, struct ir3_instruction *instr,
- bool soft, bool mergedregs)
-{
- return delay_calc_postra(block, NULL, instr, 0, soft, false, mergedregs);
-}
-
-/**
* Calculate delay for nop insertion. This must exactly match hardware
* requirements, including recursing into predecessor blocks.
*/
unsigned
-ir3_delay_calc_exact(struct ir3_block *block, struct ir3_instruction *instr,
- bool mergedregs)
+ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
+ bool mergedregs)
{
- return delay_calc_postra(block, NULL, instr, 0, false, true, mergedregs);
-}
-
-/**
- * Remove nop instructions. The scheduler can insert placeholder nop's
- * so that ir3_delay_calc() can account for nop's that won't be needed
- * due to nop's triggered by a previous instruction. However, before
- * legalize, we want to remove these. The legalize pass can insert
- * some nop's if needed to hold (for example) sync flags. This final
- * remaining nops are inserted by legalize after this.
- */
-void
-ir3_remove_nops(struct ir3 *ir)
-{
- foreach_block (block, &ir->block_list) {
- foreach_instr_safe (instr, &block->instr_list) {
- if (instr->opc == OPC_NOP) {
- list_del(&instr->node);
- }
- }
+ regmask_t mask;
+ regmask_init(&mask, mergedregs);
+ foreach_src (src, instr) {
+ if (!(src->flags & (IR3_REG_IMMED | IR3_REG_CONST)))
+ regmask_set(&mask, src);
}
+
+ return delay_calc(block, NULL, instr, 0, &mask, mergedregs);
}
diff --git a/lib/mesa/src/freedreno/ir3/ir3_lexer.l b/lib/mesa/src/freedreno/ir3/ir3_lexer.l
index 2d5582e5b..52b977896 100644
--- a/lib/mesa/src/freedreno/ir3/ir3_lexer.l
+++ b/lib/mesa/src/freedreno/ir3/ir3_lexer.l
@@ -72,16 +72,6 @@ static int parse_reg(const char *str)
return num;
}
-static int parse_w(const char *str)
-{
- str++;
- unsigned num = strtol(str, NULL, 10);
- if ((num % 32) != 0)
- yy_fatal_error("w# must be multiple of 32");
- if (num < 32)
- yy_fatal_error("w# must be at least 32");
- return num / 32;
-}
%}
%option noyywrap
@@ -139,7 +129,7 @@ static int parse_w(const char *str)
"a0.x" return T_A0;
"a1.x" return T_A1;
"p0."[xyzw] ir3_yylval.num = parse_reg(yytext); return T_P0;
-"w"[0-9]+ ir3_yylval.num = parse_w(yytext); return T_W;
+"w"[0-9]+ ir3_yylval.num = strtol(yytext+1, NULL, 10); return T_W;
"s#"[0-9]+ ir3_yylval.num = strtol(yytext+2, NULL, 10); return T_SAMP;
"t#"[0-9]+ ir3_yylval.num = strtol(yytext+2, NULL, 10); return T_TEX;
@@ -167,6 +157,7 @@ static int parse_w(const char *str)
"stkr" return TOKEN(T_OP_STKR);
"xset" return TOKEN(T_OP_XSET);
"xclr" return TOKEN(T_OP_XCLR);
+"getlast" return TOKEN(T_OP_GETLAST);
"getone" return TOKEN(T_OP_GETONE);
"dbg" return TOKEN(T_OP_DBG);
"shps" return TOKEN(T_OP_SHPS);
@@ -228,6 +219,7 @@ static int parse_w(const char *str)
"shr.b" return TOKEN(T_OP_SHR_B);
"ashr.b" return TOKEN(T_OP_ASHR_B);
"bary.f" return TOKEN(T_OP_BARY_F);
+"flat.b" return TOKEN(T_OP_FLAT_B);
"mgen.b" return TOKEN(T_OP_MGEN_B);
"getbit.b" return TOKEN(T_OP_GETBIT_B);
"setrm" return TOKEN(T_OP_SETRM);
@@ -252,7 +244,15 @@ static int parse_w(const char *str)
"sel.f32" return TOKEN(T_OP_SEL_F32);
"sad.s16" return TOKEN(T_OP_SAD_S16);
"sad.s32" return TOKEN(T_OP_SAD_S32);
-"shlg.b16" return TOKEN(T_OP_SHLG_B16);
+"shrm" return TOKEN(T_OP_SHRM);
+"shlm" return TOKEN(T_OP_SHLM);
+"shrg" return TOKEN(T_OP_SHRG);
+"shlg" return TOKEN(T_OP_SHLG);
+"andg" return TOKEN(T_OP_ANDG);
+"dp2acc" return TOKEN(T_OP_DP2ACC);
+"dp4acc" return TOKEN(T_OP_DP4ACC);
+"wmm" return TOKEN(T_OP_WMM);
+"wmm.accu" return TOKEN(T_OP_WMM_ACCU);
/* category 4: */
"rcp" return TOKEN(T_OP_RCP);
@@ -295,6 +295,11 @@ static int parse_w(const char *str)
"dsypp.1" return TOKEN(T_OP_DSYPP_1);
"rgetpos" return TOKEN(T_OP_RGETPOS);
"rgetinfo" return TOKEN(T_OP_RGETINFO);
+"brcst.active" return TOKEN(T_OP_BRCST_A);
+"quad_shuffle.brcst" return TOKEN(T_OP_QSHUFFLE_BRCST);
+"quad_shuffle.horiz" return TOKEN(T_OP_QSHUFFLE_H);
+"quad_shuffle.vert" return TOKEN(T_OP_QSHUFFLE_V);
+"quad_shuffle.diag" return TOKEN(T_OP_QSHUFFLE_DIAG);
/* category 6: */
"ldg" return TOKEN(T_OP_LDG);
@@ -338,6 +343,29 @@ static int parse_w(const char *str)
"atomic.b.and" return TOKEN(T_OP_ATOMIC_B_AND);
"atomic.b.or" return TOKEN(T_OP_ATOMIC_B_OR);
"atomic.b.xor" return TOKEN(T_OP_ATOMIC_B_XOR);
+"atomic.s.add" return TOKEN(T_OP_ATOMIC_S_ADD);
+"atomic.s.sub" return TOKEN(T_OP_ATOMIC_S_SUB);
+"atomic.s.xchg" return TOKEN(T_OP_ATOMIC_S_XCHG);
+"atomic.s.inc" return TOKEN(T_OP_ATOMIC_S_INC);
+"atomic.s.dec" return TOKEN(T_OP_ATOMIC_S_DEC);
+"atomic.s.cmpxchg" return TOKEN(T_OP_ATOMIC_S_CMPXCHG);
+"atomic.s.min" return TOKEN(T_OP_ATOMIC_S_MIN);
+"atomic.s.max" return TOKEN(T_OP_ATOMIC_S_MAX);
+"atomic.s.and" return TOKEN(T_OP_ATOMIC_S_AND);
+"atomic.s.or" return TOKEN(T_OP_ATOMIC_S_OR);
+"atomic.s.xor" return TOKEN(T_OP_ATOMIC_S_XOR);
+"atomic.g.add" return TOKEN(T_OP_ATOMIC_G_ADD);
+"atomic.g.sub" return TOKEN(T_OP_ATOMIC_G_SUB);
+"atomic.g.xchg" return TOKEN(T_OP_ATOMIC_G_XCHG);
+"atomic.g.inc" return TOKEN(T_OP_ATOMIC_G_INC);
+"atomic.g.dec" return TOKEN(T_OP_ATOMIC_G_DEC);
+"atomic.g.cmpxchg" return TOKEN(T_OP_ATOMIC_G_CMPXCHG);
+"atomic.g.min" return TOKEN(T_OP_ATOMIC_G_MIN);
+"atomic.g.max" return TOKEN(T_OP_ATOMIC_G_MAX);
+"atomic.g.and" return TOKEN(T_OP_ATOMIC_G_AND);
+"atomic.g.or" return TOKEN(T_OP_ATOMIC_G_OR);
+"atomic.g.xor" return TOKEN(T_OP_ATOMIC_G_XOR);
+
"ldgb" return TOKEN(T_OP_LDGB);
"stgb" return TOKEN(T_OP_STGB);
"stib" return TOKEN(T_OP_STIB);
@@ -345,6 +373,8 @@ static int parse_w(const char *str)
"ldlv" return TOKEN(T_OP_LDLV);
"getspid" return TOKEN(T_OP_GETSPID);
"getwid" return TOKEN(T_OP_GETWID);
+"getfiberid" return TOKEN(T_OP_GETFIBERID);
+"stc" return TOKEN(T_OP_STC);
/* category 7: */
"bar" return TOKEN(T_OP_BAR);
@@ -362,6 +392,11 @@ static int parse_w(const char *str)
"untyped" return TOKEN(T_UNTYPED);
"typed" return TOKEN(T_TYPED);
+"unsigned" return TOKEN(T_UNSIGNED);
+"mixed" return TOKEN(T_MIXED);
+"low" return TOKEN(T_LOW);
+"high" return TOKEN(T_HIGH);
+
"1d" return TOKEN(T_1D);
"2d" return TOKEN(T_2D);
"3d" return TOKEN(T_3D);
@@ -379,6 +414,7 @@ static int parse_w(const char *str)
"p" return 'p';
"s2en" return TOKEN(T_S2EN);
"s" return 's';
+"k" return 'k';
"base"[0-9]+ ir3_yylval.num = strtol(yytext+4, NULL, 10); return T_BASE;
"offset"[0-9]+ ir3_yylval.num = strtol(yytext+6, NULL, 10); return T_OFFSET;
"uniform" return T_UNIFORM;
diff --git a/lib/mesa/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c b/lib/mesa/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c
deleted file mode 100644
index 37a3dcb26..000000000
--- a/lib/mesa/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright © 2017 Ilia Mirkin
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "ir3_nir.h"
-#include "compiler/nir/nir_builder.h"
-
-/* A4XX has a broken GATHER4 operation. It performs the texture swizzle on the
- * gather results, rather than before. As a result, it must be emulated with
- * direct texture calls.
- */
-
-static bool
-lower_tg4(nir_block *block, nir_builder *b, void *mem_ctx)
-{
- bool progress = false;
-
- static const int offsets[3][2] = { {0, 1}, {1, 1}, {1, 0} };
-
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_tex)
- continue;
-
- nir_tex_instr *tg4 = (nir_tex_instr *)instr;
-
- if (tg4->op != nir_texop_tg4)
- continue;
-
- b->cursor = nir_before_instr(&tg4->instr);
-
- nir_ssa_def *results[4];
- int offset_index = nir_tex_instr_src_index(tg4, nir_tex_src_offset);
- for (int i = 0; i < 4; i++) {
- int num_srcs = tg4->num_srcs + 1 /* lod */;
- if (offset_index < 0 && i < 3)
- num_srcs++;
-
- nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs);
- tex->op = nir_texop_txl;
- tex->sampler_dim = tg4->sampler_dim;
- tex->coord_components = tg4->coord_components;
- tex->is_array = tg4->is_array;
- tex->is_shadow = tg4->is_shadow;
- tex->is_new_style_shadow = tg4->is_new_style_shadow;
- tex->texture_index = tg4->texture_index;
- tex->sampler_index = tg4->sampler_index;
- tex->dest_type = tg4->dest_type;
-
- for (int j = 0; j < tg4->num_srcs; j++) {
- nir_src_copy(&tex->src[j].src, &tg4->src[j].src, tex);
- tex->src[j].src_type = tg4->src[j].src_type;
- }
- if (i != 3) {
- nir_ssa_def *offset =
- nir_vec2(b, nir_imm_int(b, offsets[i][0]),
- nir_imm_int(b, offsets[i][1]));
- if (offset_index < 0) {
- tex->src[tg4->num_srcs].src = nir_src_for_ssa(offset);
- tex->src[tg4->num_srcs].src_type = nir_tex_src_offset;
- } else {
- assert(nir_tex_instr_src_size(tex, offset_index) == 2);
- nir_ssa_def *orig = nir_ssa_for_src(
- b, tex->src[offset_index].src, 2);
- tex->src[offset_index].src =
- nir_src_for_ssa(nir_iadd(b, orig, offset));
- }
- }
- tex->src[num_srcs - 1].src = nir_src_for_ssa(nir_imm_float(b, 0));
- tex->src[num_srcs - 1].src_type = nir_tex_src_lod;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest,
- nir_tex_instr_dest_size(tex), 32, NULL);
- nir_builder_instr_insert(b, &tex->instr);
-
- results[i] = nir_channel(b, &tex->dest.ssa, tg4->component);
- }
-
- nir_ssa_def *result = nir_vec4(b, results[0], results[1], results[2], results[3]);
- nir_ssa_def_rewrite_uses(&tg4->dest.ssa, nir_src_for_ssa(result));
-
- nir_instr_remove(&tg4->instr);
-
- progress = true;
- }
-
- return progress;
-}
-
-static bool
-lower_tg4_func(nir_function_impl *impl)
-{
- void *mem_ctx = ralloc_parent(impl);
- nir_builder b;
- nir_builder_init(&b, impl);
-
- bool progress = false;
- nir_foreach_block_safe(block, impl) {
- progress |= lower_tg4(block, &b, mem_ctx);
- }
-
- if (progress)
- nir_metadata_preserve(impl, nir_metadata_block_index |
- nir_metadata_dominance);
-
- return progress;
-}
-
-bool
-ir3_nir_lower_tg4_to_tex(nir_shader *shader)
-{
- bool progress = false;
-
- nir_foreach_function(function, shader) {
- if (function->impl)
- progress |= lower_tg4_func(function->impl);
- }
-
- return progress;
-}
diff --git a/lib/mesa/src/freedreno/ir3/ir3_parser.y b/lib/mesa/src/freedreno/ir3/ir3_parser.y
index acd94b35a..fd29c639d 100644
--- a/lib/mesa/src/freedreno/ir3/ir3_parser.y
+++ b/lib/mesa/src/freedreno/ir3/ir3_parser.y
@@ -399,6 +399,7 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_STKR
%token <tok> T_OP_XSET
%token <tok> T_OP_XCLR
+%token <tok> T_OP_GETLAST
%token <tok> T_OP_GETONE
%token <tok> T_OP_DBG
%token <tok> T_OP_SHPS
@@ -458,6 +459,7 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_SHR_B
%token <tok> T_OP_ASHR_B
%token <tok> T_OP_BARY_F
+%token <tok> T_OP_FLAT_B
%token <tok> T_OP_MGEN_B
%token <tok> T_OP_GETBIT_B
%token <tok> T_OP_SETRM
@@ -482,7 +484,15 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_SEL_F32
%token <tok> T_OP_SAD_S16
%token <tok> T_OP_SAD_S32
-%token <tok> T_OP_SHLG_B16
+%token <tok> T_OP_SHRM
+%token <tok> T_OP_SHLM
+%token <tok> T_OP_SHRG
+%token <tok> T_OP_SHLG
+%token <tok> T_OP_ANDG
+%token <tok> T_OP_DP2ACC
+%token <tok> T_OP_DP4ACC
+%token <tok> T_OP_WMM
+%token <tok> T_OP_WMM_ACCU
/* category 4: */
%token <tok> T_OP_RCP
@@ -525,6 +535,11 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_DSYPP_1
%token <tok> T_OP_RGETPOS
%token <tok> T_OP_RGETINFO
+%token <tok> T_OP_BRCST_A
+%token <tok> T_OP_QSHUFFLE_BRCST
+%token <tok> T_OP_QSHUFFLE_H
+%token <tok> T_OP_QSHUFFLE_V
+%token <tok> T_OP_QSHUFFLE_DIAG
/* category 6: */
%token <tok> T_OP_LDG
@@ -568,6 +583,28 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_ATOMIC_B_AND
%token <tok> T_OP_ATOMIC_B_OR
%token <tok> T_OP_ATOMIC_B_XOR
+%token <tok> T_OP_ATOMIC_S_ADD
+%token <tok> T_OP_ATOMIC_S_SUB
+%token <tok> T_OP_ATOMIC_S_XCHG
+%token <tok> T_OP_ATOMIC_S_INC
+%token <tok> T_OP_ATOMIC_S_DEC
+%token <tok> T_OP_ATOMIC_S_CMPXCHG
+%token <tok> T_OP_ATOMIC_S_MIN
+%token <tok> T_OP_ATOMIC_S_MAX
+%token <tok> T_OP_ATOMIC_S_AND
+%token <tok> T_OP_ATOMIC_S_OR
+%token <tok> T_OP_ATOMIC_S_XOR
+%token <tok> T_OP_ATOMIC_G_ADD
+%token <tok> T_OP_ATOMIC_G_SUB
+%token <tok> T_OP_ATOMIC_G_XCHG
+%token <tok> T_OP_ATOMIC_G_INC
+%token <tok> T_OP_ATOMIC_G_DEC
+%token <tok> T_OP_ATOMIC_G_CMPXCHG
+%token <tok> T_OP_ATOMIC_G_MIN
+%token <tok> T_OP_ATOMIC_G_MAX
+%token <tok> T_OP_ATOMIC_G_AND
+%token <tok> T_OP_ATOMIC_G_OR
+%token <tok> T_OP_ATOMIC_G_XOR
%token <tok> T_OP_LDGB
%token <tok> T_OP_STGB
%token <tok> T_OP_STIB
@@ -575,6 +612,8 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_LDLV
%token <tok> T_OP_GETSPID
%token <tok> T_OP_GETWID
+%token <tok> T_OP_GETFIBERID
+%token <tok> T_OP_STC
/* category 7: */
%token <tok> T_OP_BAR
@@ -593,6 +632,11 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_UNTYPED
%token <tok> T_TYPED
+%token <tok> T_MIXED
+%token <tok> T_UNSIGNED
+%token <tok> T_LOW
+%token <tok> T_HIGH
+
%token <tok> T_1D
%token <tok> T_2D
%token <tok> T_3D
@@ -746,7 +790,7 @@ iflag: T_SY { iflags.flags |= IR3_INSTR_SY; }
iflags:
| iflag iflags
-instrs: instr instrs
+instrs: instrs instr
| instr
instr: iflags cat0_instr
@@ -800,6 +844,7 @@ cat0_instr: T_OP_NOP { new_instr(OPC_NOP); }
| T_OP_PREDT { new_instr(OPC_PREDT); } cat0_src1
| T_OP_PREDF { new_instr(OPC_PREDF); } cat0_src1
| T_OP_PREDE { new_instr(OPC_PREDE); }
+| T_OP_GETLAST '.' T_W { new_instr(OPC_GETLAST); } cat0_immed
cat1_opc: T_OP_MOV '.' T_CAT1_TYPE_TYPE {
parse_type_type(new_instr(OPC_MOV), $3);
@@ -815,9 +860,16 @@ cat1_movmsk: T_OP_MOVMSK '.' T_W {
new_instr(OPC_MOVMSK);
instr->cat1.src_type = TYPE_U32;
instr->cat1.dst_type = TYPE_U32;
- instr->repeat = $3 - 1;
} dst_reg {
- instr->dsts[0]->wrmask = (1 << $3) - 1;
+ if (($3 % 32) != 0)
+ yyerror("w# must be multiple of 32");
+ if ($3 < 32)
+ yyerror("w# must be at least 32");
+
+ int num = $3 / 32;
+
+ instr->repeat = num - 1;
+ instr->dsts[0]->wrmask = (1 << num) - 1;
}
cat1_mova1: T_OP_MOVA1 T_A1 ',' {
@@ -894,6 +946,7 @@ cat2_opc_2src: T_OP_ADD_F { new_instr(OPC_ADD_F); }
| T_OP_SHR_B { new_instr(OPC_SHR_B); }
| T_OP_ASHR_B { new_instr(OPC_ASHR_B); }
| T_OP_BARY_F { new_instr(OPC_BARY_F); }
+| T_OP_FLAT_B { new_instr(OPC_FLAT_B); }
| T_OP_MGEN_B { new_instr(OPC_MGEN_B); }
| T_OP_GETBIT_B { new_instr(OPC_GETBIT_B); }
| T_OP_SHB { new_instr(OPC_SHB); }
@@ -910,6 +963,12 @@ cat2_instr: cat2_opc_1src dst_reg ',' src_reg_or_const_or_rel_or_imm
| cat2_opc_2src_cnd '.' cond dst_reg ',' src_reg_or_const_or_rel_or_imm ',' src_reg_or_const_or_rel_or_imm
| cat2_opc_2src dst_reg ',' src_reg_or_const_or_rel_or_imm ',' src_reg_or_const_or_rel_or_imm
+cat3_dp_signedness:'.' T_MIXED { instr->cat3.signedness = IR3_SRC_MIXED; }
+| '.' T_UNSIGNED{ instr->cat3.signedness = IR3_SRC_UNSIGNED; }
+
+cat3_dp_pack: '.' T_LOW { instr->cat3.packed = IR3_SRC_PACKED_LOW; }
+| '.' T_HIGH { instr->cat3.packed = IR3_SRC_PACKED_HIGH; }
+
cat3_opc: T_OP_MAD_U16 { new_instr(OPC_MAD_U16); }
| T_OP_MADSH_U16 { new_instr(OPC_MADSH_U16); }
| T_OP_MAD_S16 { new_instr(OPC_MAD_S16); }
@@ -927,8 +986,22 @@ cat3_opc: T_OP_MAD_U16 { new_instr(OPC_MAD_U16); }
| T_OP_SAD_S16 { new_instr(OPC_SAD_S16); }
| T_OP_SAD_S32 { new_instr(OPC_SAD_S32); }
+cat3_imm_reg_opc: T_OP_SHRM { new_instr(OPC_SHRM); }
+| T_OP_SHLM { new_instr(OPC_SHLM); }
+| T_OP_SHRG { new_instr(OPC_SHRG); }
+| T_OP_SHLG { new_instr(OPC_SHLG); }
+| T_OP_ANDG { new_instr(OPC_ANDG); }
+
+cat3_wmm: T_OP_WMM { new_instr(OPC_WMM); }
+| T_OP_WMM_ACCU { new_instr(OPC_WMM_ACCU); }
+
+cat3_dp: T_OP_DP2ACC { new_instr(OPC_DP2ACC); }
+| T_OP_DP4ACC { new_instr(OPC_DP4ACC); }
+
cat3_instr: cat3_opc dst_reg ',' src_reg_or_const_or_rel ',' src_reg_or_const ',' src_reg_or_const_or_rel
-| T_OP_SHLG_B16 { new_instr(OPC_SHLG_B16); } dst_reg ',' src_reg_or_rel_or_imm ',' src_reg_or_const ',' src_reg_or_rel_or_imm
+| cat3_imm_reg_opc dst_reg ',' src_reg_or_rel_or_imm ',' src_reg_or_const ',' src_reg_or_rel_or_imm
+| cat3_wmm dst_reg ',' src_reg_gpr ',' src_reg ',' immediate
+| cat3_dp cat3_dp_signedness cat3_dp_pack dst_reg ',' src_reg_or_rel_or_imm ',' src_reg_or_const ',' src_reg_or_rel_or_imm
cat4_opc: T_OP_RCP { new_instr(OPC_RCP); }
| T_OP_RSQ { new_instr(OPC_RSQ); }
@@ -972,6 +1045,11 @@ cat5_opc: T_OP_ISAM { new_instr(OPC_ISAM); }
| T_OP_SAMGP3 { new_instr(OPC_SAMGP3); }
| T_OP_RGETPOS { new_instr(OPC_RGETPOS); }
| T_OP_RGETINFO { new_instr(OPC_RGETINFO); }
+| T_OP_BRCST_A { new_instr(OPC_BRCST_ACTIVE); }
+| T_OP_QSHUFFLE_BRCST { new_instr(OPC_QUAD_SHUFFLE_BRCST); }
+| T_OP_QSHUFFLE_H { new_instr(OPC_QUAD_SHUFFLE_HORIZ); }
+| T_OP_QSHUFFLE_V { new_instr(OPC_QUAD_SHUFFLE_VERT); }
+| T_OP_QSHUFFLE_DIAG { new_instr(OPC_QUAD_SHUFFLE_DIAG); }
cat5_flag: '.' T_3D { instr->flags |= IR3_INSTR_3D; }
| '.' 'a' { instr->flags |= IR3_INSTR_A; }
@@ -979,13 +1057,15 @@ cat5_flag: '.' T_3D { instr->flags |= IR3_INSTR_3D; }
| '.' 'p' { instr->flags |= IR3_INSTR_P; }
| '.' 's' { instr->flags |= IR3_INSTR_S; }
| '.' T_S2EN { instr->flags |= IR3_INSTR_S2EN; }
+| '.' T_UNIFORM { }
| '.' T_NONUNIFORM { instr->flags |= IR3_INSTR_NONUNIF; }
| '.' T_BASE { instr->flags |= IR3_INSTR_B; instr->cat5.tex_base = $2; }
+| '.' T_W { instr->cat5.cluster_size = $2; }
cat5_flags:
| cat5_flag cat5_flags
cat5_samp: T_SAMP { instr->cat5.samp = $1; }
-cat5_tex: T_TEX { if (instr->flags & IR3_INSTR_B) instr->cat5.samp |= ($1 << 4); else instr->cat5.tex = $1; }
+cat5_tex: T_TEX { instr->cat5.tex = $1; }
cat5_type: '(' type ')' { instr->cat5.type = $2; }
cat5_a1: src_reg { instr->flags |= IR3_INSTR_A1EN; }
@@ -1018,7 +1098,7 @@ cat6_imm_offset: offset { new_src(0, IR3_REG_IMMED)->iim_val = $1; }
cat6_offset: cat6_imm_offset
| '+' src
cat6_dst_offset: offset { instr->cat6.dst_offset = $1; }
-| '+' src { instr->flags |= IR3_INSTR_G; }
+| '+' src
cat6_immed: integer { instr->cat6.iim_val = $1; }
@@ -1066,14 +1146,39 @@ cat6_atomic_opc: T_OP_ATOMIC_ADD { new_instr(OPC_ATOMIC_ADD); }
| T_OP_ATOMIC_OR { new_instr(OPC_ATOMIC_OR); }
| T_OP_ATOMIC_XOR { new_instr(OPC_ATOMIC_XOR); }
-cat6_atomic_g: cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src {
- instr->flags |= IR3_INSTR_G;
- }
+cat6_a3xx_atomic_opc: T_OP_ATOMIC_S_ADD { new_instr(OPC_ATOMIC_S_ADD); }
+| T_OP_ATOMIC_S_SUB { new_instr(OPC_ATOMIC_S_SUB); }
+| T_OP_ATOMIC_S_XCHG { new_instr(OPC_ATOMIC_S_XCHG); }
+| T_OP_ATOMIC_S_INC { new_instr(OPC_ATOMIC_S_INC); }
+| T_OP_ATOMIC_S_DEC { new_instr(OPC_ATOMIC_S_DEC); }
+| T_OP_ATOMIC_S_CMPXCHG { new_instr(OPC_ATOMIC_S_CMPXCHG); }
+| T_OP_ATOMIC_S_MIN { new_instr(OPC_ATOMIC_S_MIN); }
+| T_OP_ATOMIC_S_MAX { new_instr(OPC_ATOMIC_S_MAX); }
+| T_OP_ATOMIC_S_AND { new_instr(OPC_ATOMIC_S_AND); }
+| T_OP_ATOMIC_S_OR { new_instr(OPC_ATOMIC_S_OR); }
+| T_OP_ATOMIC_S_XOR { new_instr(OPC_ATOMIC_S_XOR); }
+
+cat6_a6xx_atomic_opc: T_OP_ATOMIC_G_ADD { new_instr(OPC_ATOMIC_G_ADD); }
+| T_OP_ATOMIC_G_SUB { new_instr(OPC_ATOMIC_G_SUB); }
+| T_OP_ATOMIC_G_XCHG { new_instr(OPC_ATOMIC_G_XCHG); }
+| T_OP_ATOMIC_G_INC { new_instr(OPC_ATOMIC_G_INC); }
+| T_OP_ATOMIC_G_DEC { new_instr(OPC_ATOMIC_G_DEC); }
+| T_OP_ATOMIC_G_CMPXCHG { new_instr(OPC_ATOMIC_G_CMPXCHG); }
+| T_OP_ATOMIC_G_MIN { new_instr(OPC_ATOMIC_G_MIN); }
+| T_OP_ATOMIC_G_MAX { new_instr(OPC_ATOMIC_G_MAX); }
+| T_OP_ATOMIC_G_AND { new_instr(OPC_ATOMIC_G_AND); }
+| T_OP_ATOMIC_G_OR { new_instr(OPC_ATOMIC_G_OR); }
+| T_OP_ATOMIC_G_XOR { new_instr(OPC_ATOMIC_G_XOR); }
+
+cat6_a3xx_atomic_s: cat6_a3xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src
+
+cat6_a6xx_atomic_g: cat6_a6xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' src ',' src
cat6_atomic_l: cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'l' dst_reg ',' 'l' '[' cat6_reg_or_immed ']' ',' src
-cat6_atomic: cat6_atomic_g
-| cat6_atomic_l
+cat6_atomic: cat6_atomic_l
+| cat6_a3xx_atomic_s
+| cat6_a6xx_atomic_g
cat6_ibo_opc_1src: T_OP_RESINFO { new_instr(OPC_RESINFO); }
@@ -1087,6 +1192,7 @@ cat6_ibo: cat6_ibo_opc_1src cat6_type cat6_dim dst_reg ',' 'g' '[' cat6
cat6_id_opc:
T_OP_GETSPID { new_instr(OPC_GETSPID); }
| T_OP_GETWID { new_instr(OPC_GETWID); }
+| T_OP_GETFIBERID { new_instr(OPC_GETFIBERID); }
cat6_id: cat6_id_opc cat6_type dst_reg
@@ -1102,17 +1208,17 @@ cat6_reg_or_immed: src
cat6_bindless_ibo_opc_1src: T_OP_RESINFO_B { new_instr(OPC_RESINFO); }
-cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD { new_instr(OPC_ATOMIC_ADD)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_SUB { new_instr(OPC_ATOMIC_SUB)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_XCHG { new_instr(OPC_ATOMIC_XCHG)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_INC { new_instr(OPC_ATOMIC_INC)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_DEC { new_instr(OPC_ATOMIC_DEC)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_CMPXCHG { new_instr(OPC_ATOMIC_CMPXCHG)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_MIN { new_instr(OPC_ATOMIC_MIN)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_MAX { new_instr(OPC_ATOMIC_MAX)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_AND { new_instr(OPC_ATOMIC_AND)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_OR { new_instr(OPC_ATOMIC_OR)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_XOR { new_instr(OPC_ATOMIC_XOR)->flags |= IR3_INSTR_G; dummy_dst(); }
+cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD { new_instr(OPC_ATOMIC_B_ADD); dummy_dst(); }
+| T_OP_ATOMIC_B_SUB { new_instr(OPC_ATOMIC_B_SUB); dummy_dst(); }
+| T_OP_ATOMIC_B_XCHG { new_instr(OPC_ATOMIC_B_XCHG); dummy_dst(); }
+| T_OP_ATOMIC_B_INC { new_instr(OPC_ATOMIC_B_INC); dummy_dst(); }
+| T_OP_ATOMIC_B_DEC { new_instr(OPC_ATOMIC_B_DEC); dummy_dst(); }
+| T_OP_ATOMIC_B_CMPXCHG { new_instr(OPC_ATOMIC_B_CMPXCHG); dummy_dst(); }
+| T_OP_ATOMIC_B_MIN { new_instr(OPC_ATOMIC_B_MIN); dummy_dst(); }
+| T_OP_ATOMIC_B_MAX { new_instr(OPC_ATOMIC_B_MAX); dummy_dst(); }
+| T_OP_ATOMIC_B_AND { new_instr(OPC_ATOMIC_B_AND); dummy_dst(); }
+| T_OP_ATOMIC_B_OR { new_instr(OPC_ATOMIC_B_OR); dummy_dst(); }
+| T_OP_ATOMIC_B_XOR { new_instr(OPC_ATOMIC_B_XOR); dummy_dst(); }
| T_OP_STIB_B { new_instr(OPC_STIB); dummy_dst(); }
cat6_bindless_ibo_opc_2src_dst: T_OP_LDIB_B { new_instr(OPC_LDIB); }
@@ -1123,13 +1229,23 @@ cat6_bindless_ibo: cat6_bindless_ibo_opc_1src cat6_typed cat6_dim cat6_type '.'
cat6_bindless_ldc_opc: T_OP_LDC { new_instr(OPC_LDC); }
-cat6_bindless_ldc: cat6_bindless_ldc_opc '.' T_OFFSET '.' cat6_immed '.' cat6_bindless_mode dst_reg ',' cat6_reg_or_immed ',' cat6_reg_or_immed {
- instr->cat6.d = $3;
+/* This is separated from the opcode to avoid lookahead/shift-reduce conflicts */
+cat6_bindless_ldc_middle:
+ T_OFFSET '.' cat6_immed '.' cat6_bindless_mode dst_reg { instr->cat6.d = $1; }
+| cat6_immed '.' 'k' '.' cat6_bindless_mode 'c' '[' T_A1 ']' { instr->opc = OPC_LDC_K; }
+
+cat6_bindless_ldc: cat6_bindless_ldc_opc '.' cat6_bindless_ldc_middle ',' cat6_reg_or_immed ',' cat6_reg_or_immed {
instr->cat6.type = TYPE_U32;
/* TODO cleanup ir3 src order: */
swap(instr->srcs[0], instr->srcs[1]);
}
+stc_dst: integer { new_src(0, IR3_REG_IMMED)->iim_val = $1; }
+| T_A1 { new_src(0, IR3_REG_IMMED)->iim_val = 0; instr->flags |= IR3_INSTR_A1EN; }
+| T_A1 '+' integer { new_src(0, IR3_REG_IMMED)->iim_val = $3; instr->flags |= IR3_INSTR_A1EN; }
+
+cat6_stc: T_OP_STC { new_instr(OPC_STC); } cat6_type 'c' '[' stc_dst ']' ',' src_reg ',' cat6_immed
+
cat6_todo: T_OP_G2L { new_instr(OPC_G2L); }
| T_OP_L2G { new_instr(OPC_L2G); }
| T_OP_RESFMT { new_instr(OPC_RESFMT); }
@@ -1144,6 +1260,7 @@ cat6_instr: cat6_load
| cat6_id
| cat6_bindless_ldc
| cat6_bindless_ibo
+| cat6_stc
| cat6_todo
cat7_scope: '.' 'w' { instr->cat7.w = true; }
@@ -1195,6 +1312,9 @@ src_reg_flags: src_reg_flag
src_reg: src
| src_reg_flags src
+src_reg_gpr: src_reg
+| relative_gpr_src
+
src_const: const
| src_reg_flags const
diff --git a/lib/mesa/src/freedreno/ir3/ir3_postsched.c b/lib/mesa/src/freedreno/ir3/ir3_postsched.c
index 507302a00..39de84add 100644
--- a/lib/mesa/src/freedreno/ir3/ir3_postsched.c
+++ b/lib/mesa/src/freedreno/ir3/ir3_postsched.c
@@ -68,8 +68,10 @@ struct ir3_postsched_ctx {
struct list_head unscheduled_list; /* unscheduled instructions */
- int sfu_delay;
- int tex_delay;
+ unsigned ip;
+
+ int ss_delay;
+ int sy_delay;
};
struct ir3_postsched_node {
@@ -77,7 +79,9 @@ struct ir3_postsched_node {
struct ir3_instruction *instr;
bool partially_evaluated_path;
- bool has_tex_src, has_sfu_src;
+ unsigned earliest_ip;
+
+ bool has_sy_src, has_ss_src;
unsigned delay;
unsigned max_delay;
@@ -87,17 +91,17 @@ struct ir3_postsched_node {
list_for_each_entry (struct ir3_postsched_node, __n, __list, dag.link)
static bool
-has_tex_src(struct ir3_instruction *instr)
+has_sy_src(struct ir3_instruction *instr)
{
struct ir3_postsched_node *node = instr->data;
- return node->has_tex_src;
+ return node->has_sy_src;
}
static bool
-has_sfu_src(struct ir3_instruction *instr)
+has_ss_src(struct ir3_instruction *instr)
{
struct ir3_postsched_node *node = instr->data;
- return node->has_sfu_src;
+ return node->has_ss_src;
}
static void
@@ -111,28 +115,45 @@ schedule(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
di(instr, "schedule");
- list_addtail(&instr->node, &instr->block->instr_list);
+ bool counts_for_delay = is_alu(instr) || is_flow(instr);
+
+ unsigned delay_cycles = counts_for_delay ? 1 + instr->repeat : 0;
struct ir3_postsched_node *n = instr->data;
+
+ /* We insert any nop's needed to get to earliest_ip, then advance
+ * delay_cycles by scheduling the instruction.
+ */
+ ctx->ip = MAX2(ctx->ip, n->earliest_ip) + delay_cycles;
+
+ util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
+ unsigned delay = (unsigned)(uintptr_t)edge->data;
+ struct ir3_postsched_node *child =
+ container_of(edge->child, struct ir3_postsched_node, dag);
+ child->earliest_ip = MAX2(child->earliest_ip, ctx->ip + delay);
+ }
+
+ list_addtail(&instr->node, &instr->block->instr_list);
+
dag_prune_head(ctx->dag, &n->dag);
if (is_meta(instr) && (instr->opc != OPC_META_TEX_PREFETCH))
return;
- if (is_sfu(instr)) {
- ctx->sfu_delay = 8;
- } else if (has_sfu_src(instr)) {
- ctx->sfu_delay = 0;
- } else if (ctx->sfu_delay > 0) {
- ctx->sfu_delay--;
+ if (is_ss_producer(instr)) {
+ ctx->ss_delay = soft_ss_delay(instr);
+ } else if (has_ss_src(instr)) {
+ ctx->ss_delay = 0;
+ } else if (ctx->ss_delay > 0) {
+ ctx->ss_delay--;
}
- if (is_tex_or_prefetch(instr)) {
- ctx->tex_delay = 10;
- } else if (has_tex_src(instr)) {
- ctx->tex_delay = 0;
- } else if (ctx->tex_delay > 0) {
- ctx->tex_delay--;
+ if (is_sy_producer(instr)) {
+ ctx->sy_delay = soft_sy_delay(instr, ctx->block->shader);
+ } else if (has_sy_src(instr)) {
+ ctx->sy_delay = 0;
+ } else if (ctx->sy_delay > 0) {
+ ctx->sy_delay--;
}
}
@@ -154,25 +175,26 @@ dump_state(struct ir3_postsched_ctx *ctx)
}
}
-/* Determine if this is an instruction that we'd prefer not to schedule
- * yet, in order to avoid an (ss) sync. This is limited by the sfu_delay
- * counter, ie. the more cycles it has been since the last SFU, the less
- * costly a sync would be.
- */
-static bool
-would_sync(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
+static unsigned
+node_delay(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n)
{
- if (ctx->sfu_delay) {
- if (has_sfu_src(instr))
- return true;
- }
+ return MAX2(n->earliest_ip, ctx->ip) - ctx->ip;
+}
- if (ctx->tex_delay) {
- if (has_tex_src(instr))
- return true;
- }
+static unsigned
+node_delay_soft(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n)
+{
+ unsigned delay = node_delay(ctx, n);
+
+ /* This takes into account that as when we schedule multiple tex or sfu, the
+ * first user has to wait for all of them to complete.
+ */
+ if (n->has_ss_src)
+ delay = MAX2(delay, ctx->ss_delay);
+ if (n->has_sy_src)
+ delay = MAX2(delay, ctx->sy_delay);
- return false;
+ return delay;
}
/* find instruction to schedule: */
@@ -215,8 +237,7 @@ choose_instr(struct ir3_postsched_ctx *ctx)
/* Next prioritize discards: */
foreach_sched_node (n, &ctx->dag->heads) {
- unsigned d =
- ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs);
+ unsigned d = node_delay(ctx, n);
if (d > 0)
continue;
@@ -235,13 +256,12 @@ choose_instr(struct ir3_postsched_ctx *ctx)
/* Next prioritize expensive instructions: */
foreach_sched_node (n, &ctx->dag->heads) {
- unsigned d =
- ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs);
+ unsigned d = node_delay_soft(ctx, n);
if (d > 0)
continue;
- if (!(is_sfu(n->instr) || is_tex(n->instr)))
+ if (!(is_ss_producer(n->instr) || is_sy_producer(n->instr)))
continue;
if (!chosen || (chosen->max_delay < n->max_delay))
@@ -249,53 +269,36 @@ choose_instr(struct ir3_postsched_ctx *ctx)
}
if (chosen) {
- di(chosen->instr, "csp: chose (sfu/tex, hard ready)");
+ di(chosen->instr, "csp: chose (sfu/tex, soft ready)");
return chosen->instr;
}
- /*
- * Sometimes be better to take a nop, rather than scheduling an
- * instruction that would require an (ss) shortly after another
- * SFU.. ie. if last SFU was just one or two instr ago, and we
- * could choose between taking a nop and then scheduling
- * something else, vs scheduling the immed avail instruction that
- * would require (ss), we are better with the nop.
- */
- for (unsigned delay = 0; delay < 4; delay++) {
- foreach_sched_node (n, &ctx->dag->heads) {
- if (would_sync(ctx, n->instr))
- continue;
-
- unsigned d = ir3_delay_calc_postra(ctx->block, n->instr, true,
- ctx->v->mergedregs);
-
- if (d > delay)
- continue;
-
- if (!chosen || (chosen->max_delay < n->max_delay))
- chosen = n;
- }
-
- if (chosen) {
- di(chosen->instr, "csp: chose (soft ready, delay=%u)", delay);
- return chosen->instr;
- }
- }
-
/* Next try to find a ready leader w/ soft delay (ie. including extra
* delay for things like tex fetch which can be synchronized w/ sync
* bit (but we probably do want to schedule some other instructions
- * while we wait)
+ * while we wait). We also allow a small amount of nops, to prefer now-nops
+ * over future-nops up to a point, as that gives better results.
*/
+ unsigned chosen_delay = 0;
foreach_sched_node (n, &ctx->dag->heads) {
- unsigned d =
- ir3_delay_calc_postra(ctx->block, n->instr, true, ctx->v->mergedregs);
+ unsigned d = node_delay_soft(ctx, n);
- if (d > 0)
+ if (d > 3)
continue;
- if (!chosen || (chosen->max_delay < n->max_delay))
+ if (!chosen || d < chosen_delay) {
+ chosen = n;
+ chosen_delay = d;
+ continue;
+ }
+
+ if (d > chosen_delay)
+ continue;
+
+ if (chosen->max_delay < n->max_delay) {
chosen = n;
+ chosen_delay = d;
+ }
}
if (chosen) {
@@ -308,8 +311,7 @@ choose_instr(struct ir3_postsched_ctx *ctx)
* stalls.. but we've already decided there is not a better option.
*/
foreach_sched_node (n, &ctx->dag->heads) {
- unsigned d =
- ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs);
+ unsigned d = node_delay(ctx, n);
if (d > 0)
continue;
@@ -324,9 +326,6 @@ choose_instr(struct ir3_postsched_ctx *ctx)
}
/* Otherwise choose leader with maximum cost:
- *
- * TODO should we try to balance cost and delays? I guess it is
- * a balance between now-nop's and future-nop's?
*/
foreach_sched_node (n, &ctx->dag->heads) {
if (!chosen || chosen->max_delay < n->max_delay)
@@ -361,6 +360,7 @@ struct ir3_postsched_deps_state {
* for full precision and 2nd half for half-precision.
*/
struct ir3_postsched_node *regs[2 * 256];
+ unsigned dst_n[2 * 256];
};
/* bounds checking read/write accessors, since OoB access to stuff on
@@ -374,7 +374,8 @@ struct ir3_postsched_deps_state {
static void
add_dep(struct ir3_postsched_deps_state *state,
- struct ir3_postsched_node *before, struct ir3_postsched_node *after)
+ struct ir3_postsched_node *before, struct ir3_postsched_node *after,
+ unsigned d)
{
if (!before || !after)
return;
@@ -382,30 +383,36 @@ add_dep(struct ir3_postsched_deps_state *state,
assert(before != after);
if (state->direction == F) {
- dag_add_edge(&before->dag, &after->dag, NULL);
+ dag_add_edge_max_data(&before->dag, &after->dag, (uintptr_t)d);
} else {
- dag_add_edge(&after->dag, &before->dag, NULL);
+ dag_add_edge_max_data(&after->dag, &before->dag, 0);
}
}
static void
add_single_reg_dep(struct ir3_postsched_deps_state *state,
- struct ir3_postsched_node *node, unsigned num, int src_n)
+ struct ir3_postsched_node *node, unsigned num, int src_n,
+ int dst_n)
{
struct ir3_postsched_node *dep = dep_reg(state, num);
+ unsigned d = 0;
if (src_n >= 0 && dep && state->direction == F) {
- unsigned d = ir3_delayslots(dep->instr, node->instr, src_n, true);
- node->delay = MAX2(node->delay, d);
- if (is_tex_or_prefetch(dep->instr))
- node->has_tex_src = true;
- if (is_tex_or_prefetch(dep->instr))
- node->has_sfu_src = true;
- }
-
- add_dep(state, dep, node);
+ /* get the dst_n this corresponds to */
+ unsigned dst_n = state->dst_n[num];
+ unsigned d_soft = ir3_delayslots(dep->instr, node->instr, src_n, true);
+ d = ir3_delayslots_with_repeat(dep->instr, node->instr, dst_n, src_n);
+ node->delay = MAX2(node->delay, d_soft);
+ if (is_sy_producer(dep->instr))
+ node->has_sy_src = true;
+ if (is_ss_producer(dep->instr))
+ node->has_ss_src = true;
+ }
+
+ add_dep(state, dep, node, d);
if (src_n < 0) {
dep_reg(state, num) = node;
+ state->dst_n[num] = dst_n;
}
}
@@ -413,15 +420,15 @@ add_single_reg_dep(struct ir3_postsched_deps_state *state,
* between half and full precision that result in additional dependencies.
* The 'reg' arg is really just to know half vs full precision.
*
- * If non-negative, then this adds a dependency on a source register, and
+ * If src_n is positive, then this adds a dependency on a source register, and
* src_n is the index passed into ir3_delayslots() for calculating the delay:
- * If positive, corresponds to node->instr->regs[src_n]. If negative, then
- * this is for a destination register.
+ * it corresponds to node->instr->srcs[src_n]. If src_n is negative, then
+ * this is for the destination register corresponding to dst_n.
*/
static void
add_reg_dep(struct ir3_postsched_deps_state *state,
struct ir3_postsched_node *node, const struct ir3_register *reg,
- unsigned num, int src_n)
+ unsigned num, int src_n, int dst_n)
{
if (state->merged) {
/* Make sure that special registers like a0.x that are written as
@@ -430,16 +437,16 @@ add_reg_dep(struct ir3_postsched_deps_state *state,
*/
if ((reg->flags & IR3_REG_HALF) && !is_reg_special(reg)) {
/* single conflict in half-reg space: */
- add_single_reg_dep(state, node, num, src_n);
+ add_single_reg_dep(state, node, num, src_n, dst_n);
} else {
/* two conflicts in half-reg space: */
- add_single_reg_dep(state, node, 2 * num + 0, src_n);
- add_single_reg_dep(state, node, 2 * num + 1, src_n);
+ add_single_reg_dep(state, node, 2 * num + 0, src_n, dst_n);
+ add_single_reg_dep(state, node, 2 * num + 1, src_n, dst_n);
}
} else {
if (reg->flags & IR3_REG_HALF)
num += ARRAY_SIZE(state->regs) / 2;
- add_single_reg_dep(state, node, num, src_n);
+ add_single_reg_dep(state, node, num, src_n, dst_n);
}
}
@@ -457,12 +464,12 @@ calculate_deps(struct ir3_postsched_deps_state *state,
if (reg->flags & IR3_REG_RELATIV) {
/* mark entire array as read: */
for (unsigned j = 0; j < reg->size; j++) {
- add_reg_dep(state, node, reg, reg->array.base + j, i);
+ add_reg_dep(state, node, reg, reg->array.base + j, i, -1);
}
} else {
assert(reg->wrmask >= 1);
u_foreach_bit (b, reg->wrmask) {
- add_reg_dep(state, node, reg, reg->num + b, i);
+ add_reg_dep(state, node, reg, reg->num + b, i, -1);
}
}
}
@@ -470,18 +477,18 @@ calculate_deps(struct ir3_postsched_deps_state *state,
/* And then after we update the state for what this instruction
* wrote:
*/
- foreach_dst (reg, node->instr) {
+ foreach_dst_n (reg, i, node->instr) {
if (reg->wrmask == 0)
continue;
if (reg->flags & IR3_REG_RELATIV) {
/* mark the entire array as written: */
- for (unsigned i = 0; i < reg->size; i++) {
- add_reg_dep(state, node, reg, reg->array.base + i, -1);
+ for (unsigned j = 0; j < reg->size; j++) {
+ add_reg_dep(state, node, reg, reg->array.base + j, -1, i);
}
} else {
assert(reg->wrmask >= 1);
u_foreach_bit (b, reg->wrmask) {
- add_reg_dep(state, node, reg, reg->num + b, -1);
+ add_reg_dep(state, node, reg, reg->num + b, -1, i);
}
}
}
@@ -593,7 +600,7 @@ sched_dag_init(struct ir3_postsched_ctx *ctx)
if (src->block != instr->block)
continue;
- dag_add_edge(&sn->dag, &n->dag, NULL);
+ dag_add_edge_max_data(&sn->dag, &n->dag, 0);
}
if (is_input(instr)) {
@@ -602,14 +609,14 @@ sched_dag_init(struct ir3_postsched_ctx *ctx)
util_dynarray_foreach (&inputs, struct ir3_instruction *, instrp) {
struct ir3_instruction *input = *instrp;
struct ir3_postsched_node *in = input->data;
- dag_add_edge(&in->dag, &n->dag, NULL);
+ dag_add_edge_max_data(&in->dag, &n->dag, 0);
}
util_dynarray_append(&kills, struct ir3_instruction *, instr);
} else if (is_tex(instr) || is_mem(instr)) {
util_dynarray_foreach (&kills, struct ir3_instruction *, instrp) {
struct ir3_instruction *kill = *instrp;
struct ir3_postsched_node *kn = kill->data;
- dag_add_edge(&kn->dag, &n->dag, NULL);
+ dag_add_edge_max_data(&kn->dag, &n->dag, 0);
}
}
}
@@ -630,8 +637,8 @@ static void
sched_block(struct ir3_postsched_ctx *ctx, struct ir3_block *block)
{
ctx->block = block;
- ctx->tex_delay = 0;
- ctx->sfu_delay = 0;
+ ctx->sy_delay = 0;
+ ctx->ss_delay = 0;
/* move all instructions to the unscheduled list, and
* empty the block's instruction list (to which we will
@@ -677,18 +684,10 @@ sched_block(struct ir3_postsched_ctx *ctx, struct ir3_block *block)
while (!list_is_empty(&ctx->unscheduled_list)) {
struct ir3_instruction *instr = choose_instr(ctx);
- unsigned delay =
- ir3_delay_calc_postra(ctx->block, instr, false, ctx->v->mergedregs);
+ unsigned delay = node_delay(ctx, instr->data);
d("delay=%u", delay);
- /* and if we run out of instructions that can be scheduled,
- * then it is time for nop's:
- */
debug_assert(delay <= 6);
- while (delay > 0) {
- ir3_NOP(block);
- delay--;
- }
schedule(ctx, instr);
}
@@ -750,7 +749,6 @@ ir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v)
.v = v,
};
- ir3_remove_nops(ir);
cleanup_self_movs(ir);
foreach_block (block, &ir->block_list) {
diff --git a/lib/mesa/src/freedreno/ir3/ir3_ra.h b/lib/mesa/src/freedreno/ir3/ir3_ra.h
index 259341eaa..c6837aaae 100644
--- a/lib/mesa/src/freedreno/ir3/ir3_ra.h
+++ b/lib/mesa/src/freedreno/ir3/ir3_ra.h
@@ -124,7 +124,7 @@ ra_reg_is_dst(const struct ir3_register *reg)
if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i])))
#define ra_foreach_dst_n(__dstreg, __n, __instr) \
- foreach_dst_n(__dstreg, __n, instr) \
+ foreach_dst_n(__dstreg, __n, __instr) \
if (ra_reg_is_dst(__dstreg))
#define ra_foreach_dst(__dstreg, __instr) \
diff --git a/lib/mesa/src/freedreno/ir3/tests/disasm.c b/lib/mesa/src/freedreno/ir3/tests/disasm.c
index 542469aa1..2f1b89f0d 100644
--- a/lib/mesa/src/freedreno/ir3/tests/disasm.c
+++ b/lib/mesa/src/freedreno/ir3/tests/disasm.c
@@ -43,6 +43,8 @@
#include "isa/isa.h"
/* clang-format off */
+/* Note: @anholt's 4xx disasm was done on an a418 Nexus 5x */
+#define INSTR_4XX(i, d, ...) { .gpu_id = 420, .instr = #i, .expected = d, __VA_ARGS__ }
#define INSTR_5XX(i, d, ...) { .gpu_id = 540, .instr = #i, .expected = d, __VA_ARGS__ }
#define INSTR_6XX(i, d, ...) { .gpu_id = 630, .instr = #i, .expected = d, __VA_ARGS__ }
/* clang-format on */
@@ -58,153 +60,185 @@ static const struct test {
bool parse_fail;
} tests[] = {
/* clang-format off */
- /* cat0 */
- INSTR_6XX(00000000_00000000, "nop"),
- INSTR_6XX(00000200_00000000, "(rpt2)nop"),
- INSTR_6XX(03000000_00000000, "end"),
- INSTR_6XX(00800000_00000004, "br p0.x, #4"),
- INSTR_6XX(00900000_00000003, "br !p0.x, #3"),
- INSTR_6XX(03820000_00000015, "shps #21"), /* emit */
- INSTR_6XX(04021000_00000000, "(ss)shpe"), /* cut */
- INSTR_6XX(02820000_00000014, "getone #20"), /* kill p0.x */
- INSTR_6XX(00906020_00000007, "brao !p0.x, !p0.y, #7"),
- INSTR_6XX(00804040_00000003, "braa p0.x, p0.y, #3"),
- INSTR_6XX(07820000_00000000, "prede"),
- INSTR_6XX(00800063_0000001e, "brac.3 #30"),
- INSTR_6XX(06820000_00000000, "predt p0.x"),
- INSTR_6XX(07020000_00000000, "predf p0.x"),
- INSTR_6XX(07820000_00000000, "prede"),
-
- /* cat1 */
- INSTR_6XX(20244000_00000020, "mov.f32f32 r0.x, c8.x"),
- INSTR_6XX(20200000_00000020, "mov.f16f16 hr0.x, hc8.x"),
- INSTR_6XX(20150000_00000000, "cov.s32s16 hr0.x, r0.x"),
- INSTR_6XX(20156004_00000c11, "(ul)mov.s32s32 r1.x, c<a0.x + 17>"),
- INSTR_6XX(201100f4_00000000, "mova a0.x, hr0.x"),
- INSTR_6XX(20244905_00000410, "(rpt1)mov.f32f32 r1.y, (r)c260.x"),
- INSTR_6XX(20174004_00000008, "mov.s32s32 r<a0.x + 4>, r2.x"),
- INSTR_6XX(20130000_00000005, "mov.s16s16 hr<a0.x>, hr1.y"),
- INSTR_6XX(20110004_00000800, "mov.s16s16 hr1.x, hr<a0.x>"),
- /* dEQP-VK.subgroups.ballot.compute.compute */
- INSTR_6XX(260cc3c0_00000000, "movmsk.w128 r48.x"), /* movmsk.w128 sr48.x */
-
- INSTR_6XX(240cc004_00030201, "swz.u32u32 r1.x, r0.w, r0.y, r0.z"),
- INSTR_6XX(2400c105_04030201, "gat.f16u32 r1.y, hr0.y, hr0.z, hr0.w, hr1.x"),
- INSTR_6XX(240c0205_04030201, "sct.u32f16 hr1.y, hr0.z, hr0.w, hr1.x, r0.y"),
- INSTR_6XX(2400c205_04030201, "sct.f16u32 r1.y, r0.z, r0.w, r1.x, hr0.y"),
-
- INSTR_6XX(20510005_0000ffff, "mov.s16s16 hr1.y, -1"),
- INSTR_6XX(20400005_00003900, "mov.f16f16 hr1.y, h(0.625000)"),
- INSTR_6XX(20400006_00003800, "mov.f16f16 hr1.z, h(0.500000)"),
- INSTR_6XX(204880f5_00000000, "mova1 a1.x, 0"),
-
- /* cat2 */
- INSTR_6XX(40104002_0c210001, "add.f hr0.z, r0.y, c<a0.x + 33>"),
- INSTR_6XX(40b80804_10408004, "(nop3) cmps.f.lt r1.x, (abs)r1.x, c16.x"),
- INSTR_6XX(47308a02_00002000, "(rpt2)bary.f (ei)r0.z, (r)0, r0.x"),
- INSTR_6XX(43480801_00008001, "(nop3) absneg.s hr0.y, (abs)hr0.y"),
- INSTR_6XX(50600004_2c010004, "(sy)mul.f hr1.x, hr1.x, h(0.5)"),
- INSTR_6XX(42280807_27ff0000, "(nop3) add.s hr1.w, hr0.x, h(-1)"),
- INSTR_6XX(40a500f8_2c000004, "cmps.f.ne p0.x, hr1.x, h(0.0)"),
- INSTR_6XX(438000f8_20010009, "and.b p0.x, hr2.y, h(1)"),
- INSTR_6XX(438000f9_00020001, "and.b p0.y, hr0.y, hr0.z"),
- INSTR_6XX(40080902_50200006, "(rpt1)add.f hr0.z, (r)hr1.z, (neg)(r)hc8.x"),
- INSTR_6XX(42380c01_00040001, "(sat)(nop3) add.s r0.y, r0.y, r1.x"),
- INSTR_6XX(42480000_48801086, "(nop2) sub.u hr0.x, hc33.z, (neg)hr<a0.x + 128>"),
- INSTR_6XX(46b00001_00001020, "clz.b r0.y, c8.x"),
- INSTR_6XX(46700009_00000009, "bfrev.b r2.y, r2.y"),
-
- /* cat3 */
- INSTR_6XX(66000000_10421041, "sel.f16 hr0.x, hc16.y, hr0.x, hc16.z"),
- INSTR_6XX(64848109_109a9099, "(rpt1)sel.b32 r2.y, c38.y, (r)r2.y, c38.z"),
- INSTR_6XX(64810904_30521036, "(rpt1)sel.b32 r1.x, (r)c13.z, r0.z, (r)c20.z"),
- INSTR_6XX(64818902_20041032, "(rpt1)sel.b32 r0.z, (r)c12.z, r0.w, (r)r1.x"),
- INSTR_6XX(63820005_10315030, "mad.f32 r1.y, (neg)c12.x, r1.x, c12.y"),
- INSTR_6XX(62050009_00091000, "mad.u24 r2.y, c0.x, r2.z, r2.y"),
- INSTR_6XX(61828008_00081033, "madsh.m16 r2.x, c12.w, r1.y, r2.x"),
- INSTR_6XX(65900820_100cb008, "(nop3) shlg.b16 hr8.x, 8, hr8.x, 12"), /* (nop3) shlg.b16 hr8.x, (r)8, (r)hr8.x, 12; */
- INSTR_6XX(65ae085c_0002a001, "(nop3) shlg.b16 hr23.x, hr0.y, hr23.x, hr0.z"), /* not seen in blob */
- INSTR_6XX(65900820_0c0aac05, "(nop3) shlg.b16 hr8.x, hc<a0.x + 5>, hr8.x, hc<a0.x + 10>"), /* not seen in blob */
-
- /* cat4 */
- INSTR_6XX(8010000a_00000003, "rcp r2.z, r0.w"),
-
- /* cat5 */
- /* dEQP-VK.glsl.derivate.dfdx.uniform_if.float_mediump */
- INSTR_6XX(a3801102_00000001, "dsx (f32)(x)r0.z, r0.x"), /* dsx (f32)(xOOO)r0.z, r0.x */
- /* dEQP-VK.glsl.derivate.dfdy.uniform_if.float_mediump */
- INSTR_6XX(a3c01102_00000001, "dsy (f32)(x)r0.z, r0.x"), /* dsy (f32)(xOOO)r0.z, r0.x */
- /* dEQP-VK.glsl.derivate.dfdxfine.uniform_loop.float_highp */
- INSTR_6XX(a6001105_00000001, "dsxpp.1 (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */
- INSTR_6XX(a6201105_00000001, "dsxpp.1.p (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */
-
- INSTR_6XX(a2802f00_00000001, "getsize (u16)(xyzw)hr0.x, r0.x, t#0"),
- INSTR_6XX(a0c89f04_c4600005, "sam.base1 (f32)(xyzw)r1.x, r0.z, s#3, t#2"), /* sam.s2en.mode6.base1 (f32)(xyzw)r1.x, r0.z, 35 */
- INSTR_6XX(a1c85f00_c0200005, "getlod.base0 (s32)(xyzw)r0.x, r0.z, s#1, t#0"), /* getlod.s2en.mode6.base0 (s32)(xyzw)r0.x, r0.z, 1 */
- INSTR_6XX(a1000f00_00000004, "samb (f16)(xyzw)hr0.x, hr0.z, hr0.x, s#0, t#0"),
- INSTR_6XX(a1000f00_00000003, "samb (f16)(xyzw)hr0.x, r0.y, r0.x, s#0, t#0"),
- INSTR_6XX(a0c00f00_04400002, "sam (f16)(xyzw)hr0.x, hr0.y, s#2, t#2"),
- INSTR_6XX(a6c02f00_00000000, "rgetinfo (u16)(xyzw)hr0.x"),
- INSTR_6XX(a3482f08_c0000000, "getinfo.base0 (u16)(xyzw)hr2.x, t#0"),
- /* dEQP-GLES31.functional.texture.texture_buffer.render.as_fragment_texture.buffer_size_65536 */
- INSTR_5XX(a2c03102_00000000, "getbuf (u32)(x)r0.z, t#0"),
- INSTR_6XX(a0c81f00_e0200005, "sam.base0 (f32)(xyzw)r0.x, r0.z, s#1, a1.x"),
-
-
- /* cat6 */
-
- INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"), /* ldgb.a.untyped.1dtype.u32.1 r0.x, g[r1.x], r0.x, 0 */
- INSTR_5XX(d7660204_02000a01, "(sy)stib.typed.2d.u32.1 g[1], r0.x, r0.z, r1.x"), /* (sy)stib.a.u32.2d.1 g[r1.x], r0.x, r0.z, 1. r1.x is offset in ibo, r0.x is value*/
- /* dEQP-VK.image.load_store.1d_array.r8g8b8a8_unorm */
- INSTR_5XX(c1a20006_0600ba01, "ldib.typed.2d.f32.4 r1.z, g[0], r0.z, r1.z"), /* ldib.a.f32.2d.4 r1.z, g[r0.z], r1.z, 0. r0.z is offset in ibo as src. r1.z */
- /* dEQP-VK.image.load_store.3d.r32g32b32a32_sint */
- INSTR_5XX(c1aa0003_0500fc01, "ldib.typed.3d.s32.4 r0.w, g[0], r0.w, r1.y"), /* ldib.a.s32.3d.4 r0.w, g[r0.w], r1.y, 0. r0.w is offset in ibo as src, and dst */
- /* dEQP-VK.binding_model.shader_access.primary_cmd_buf.storage_image.vertex.descriptor_array.3d */
- INSTR_5XX(c1a20204_0401fc01, "ldib.typed.3d.f32.4 r1.x, g[1], r1.w, r1.x"), /* ldib.a.f32.3d.4 r1.x, g[r1.w], r1.x, 1 */
- /* dEQP-VK.binding_model.shader_access.secondary_cmd_buf.with_push.storage_texel_buffer.vertex_fragment.single_descriptor.offset_zero */
- INSTR_5XX(c1a20005_0501be01, "ldib.typed.4d.f32.4 r1.y, g[0], r1.z, r1.y"), /* ldib.a.f32.1dtype.4 r1.y, g[r1.z], r1.y, 0 */
- /* dEQP-VK.texture.filtering.cube.formats.r8g8b8a8_snorm_nearest */
- INSTR_5XX(c1a60200_0000ba01, "ldib.typed.2d.u32.4 r0.x, g[1], r0.z, r0.x"), /* ldib.a.u32.2d.4 r0.x, g[r0.z], r0.x, 1 */
-
- // TODO is this a real instruction? Or float -6.0 ?
- // INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true),
- /* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */
- INSTR_6XX(c0d20906_02800004, "stg.a.f32 g[r1.x+(r1.z)<<2], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */
- INSTR_6XX(c0da052e_01800042, "stg.a.s32 g[r0.z+(r11.z)<<2], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */
- INSTR_6XX(c0ca0505_03800042, "stg.s32 g[r0.z+5], r8.y, 3"),
- INSTR_6XX(c0ca0500_03800042, "stg.s32 g[r0.z], r8.y, 3"),
- INSTR_6XX(c0ca0531_03800242, "stg.s32 g[r0.z+305], r8.y, 3"),
-
- /* Customely crafted */
- INSTR_6XX(c0d61104_01800228, "stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1"),
- INSTR_6XX(c0d61104_01802628, "stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1"),
-
- INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
- INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
- INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
- INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),
- INSTR_6XX(c0060003_0180c269, "ldg.u32 r0.w, g[r0.w+308], 1"),
-
- /* Found in TCS/TES shaders of GTA V */
- INSTR_6XX(c0020007_03c1420f, "ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3"), /* ldg.a.f32 r1.w, g[r1.y+((r1.w+1)<<2)], 3 */
-
- /* Customely crafted */
- INSTR_6XX(c0020007_03c1740f, "ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3"),
-
- INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
- INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
- INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
- INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),
-
- /* dEQP-GLES3.functional.ubo.random.basic_arrays.0 */
- INSTR_6XX(c7020020_01800000, "stc c[32], r0.x, 1", .parse_fail=true),
- /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */
- INSTR_6XX(c7060020_03800000, "stc c[32], r0.x, 3", .parse_fail=true),
-
- /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */
- INSTR_6XX(c0260200_03676100, "stib.b.untyped.1d.u32.3.imm.base0 r0.x, r0.w, 1"), /* stib.untyped.u32.1d.3.mode4.base0 r0.x, r0.w, 1 */
-
- INSTR_6XX(c0240402_00674100, "stib.b.untyped.1d.u16.1.imm.base0 r0.z, r0.x, 2"),
+ /* cat0 */
+ INSTR_6XX(00000000_00000000, "nop"),
+ INSTR_6XX(00000200_00000000, "(rpt2)nop"),
+ INSTR_6XX(03000000_00000000, "end"),
+ INSTR_6XX(00800000_00000004, "br p0.x, #4"),
+ INSTR_6XX(00800000_fffffffc, "br p0.x, #-4"),
+ INSTR_6XX(00900000_00000003, "br !p0.x, #3"),
+ INSTR_6XX(03820000_00000015, "shps #21"), /* emit */
+ INSTR_6XX(04021000_00000000, "(ss)shpe"), /* cut */
+ INSTR_6XX(02220000_00000004, "getlast.w8 #4"),
+ INSTR_6XX(02820000_00000014, "getone #20"), /* kill p0.x */
+ INSTR_6XX(00906020_00000007, "brao !p0.x, !p0.y, #7"),
+ INSTR_6XX(00804040_00000003, "braa p0.x, p0.y, #3"),
+ INSTR_6XX(07820000_00000000, "prede"),
+ INSTR_6XX(00800063_0000001e, "brac.3 #30"),
+ INSTR_6XX(06820000_00000000, "predt p0.x"),
+ INSTR_6XX(07020000_00000000, "predf p0.x"),
+ INSTR_6XX(07820000_00000000, "prede"),
+
+ /* cat1 */
+ INSTR_6XX(20244000_00000020, "mov.f32f32 r0.x, c8.x"),
+ INSTR_6XX(20200000_00000020, "mov.f16f16 hr0.x, hc8.x"),
+ INSTR_6XX(20150000_00000000, "cov.s32s16 hr0.x, r0.x"),
+ INSTR_6XX(20156004_00000c11, "(ul)mov.s32s32 r1.x, c<a0.x + 17>"),
+ INSTR_6XX(201100f4_00000000, "mova a0.x, hr0.x"),
+ INSTR_6XX(20244905_00000410, "(rpt1)mov.f32f32 r1.y, (r)c260.x"),
+ INSTR_6XX(20174004_00000008, "mov.s32s32 r<a0.x + 4>, r2.x"),
+ INSTR_6XX(20130000_00000005, "mov.s16s16 hr<a0.x>, hr1.y"),
+ INSTR_6XX(20110004_00000800, "mov.s16s16 hr1.x, hr<a0.x>"),
+ /* dEQP-VK.subgroups.ballot.compute.compute */
+ INSTR_6XX(260cc3c0_00000000, "movmsk.w128 r48.x"), /* movmsk.w128 sr48.x */
+
+ INSTR_6XX(240cc004_00030201, "swz.u32u32 r1.x, r0.w, r0.y, r0.z"),
+ INSTR_6XX(2400c105_04030201, "gat.f16u32 r1.y, hr0.y, hr0.z, hr0.w, hr1.x"),
+ INSTR_6XX(240c0205_04030201, "sct.u32f16 hr1.y, hr0.z, hr0.w, hr1.x, r0.y"),
+ INSTR_6XX(2400c205_04030201, "sct.f16u32 r1.y, r0.z, r0.w, r1.x, hr0.y"),
+
+ INSTR_6XX(20510005_0000ffff, "mov.s16s16 hr1.y, -1"),
+ INSTR_6XX(20400005_00003900, "mov.f16f16 hr1.y, h(0.625000)"),
+ INSTR_6XX(20400006_00003800, "mov.f16f16 hr1.z, h(0.500000)"),
+ INSTR_6XX(204880f5_00000000, "mova1 a1.x, 0"),
+
+ /* cat2 */
+ INSTR_6XX(40104002_0c210001, "add.f hr0.z, r0.y, c<a0.x + 33>"),
+ INSTR_6XX(40b80804_10408004, "(nop3) cmps.f.lt r1.x, (abs)r1.x, c16.x"),
+ INSTR_6XX(47308a02_00002000, "(rpt2)bary.f (ei)r0.z, (r)0, r0.x"),
+ INSTR_6XX(47348000_00002000, "flat.b (ei)r0.x, 0, r0.x"),
+ INSTR_6XX(43480801_00008001, "(nop3) absneg.s hr0.y, (abs)hr0.y"),
+ INSTR_6XX(50600004_2c010004, "(sy)mul.f hr1.x, hr1.x, h(0.5)"),
+ INSTR_6XX(42280807_27ff0000, "(nop3) add.s hr1.w, hr0.x, h(-1)"),
+ INSTR_6XX(40a500f8_2c000004, "cmps.f.ne p0.x, hr1.x, h(0.0)"),
+ INSTR_6XX(438000f8_20010009, "and.b p0.x, hr2.y, h(1)"),
+ INSTR_6XX(438000f9_00020001, "and.b p0.y, hr0.y, hr0.z"),
+ INSTR_6XX(40080902_50200006, "(rpt1)add.f hr0.z, (r)hr1.z, (neg)(r)hc8.x"),
+ INSTR_6XX(42380c01_00040001, "(sat)(nop3) add.s r0.y, r0.y, r1.x"),
+ INSTR_6XX(42480000_48801086, "(nop2) sub.u hr0.x, hc33.z, (neg)hr<a0.x + 128>"),
+ INSTR_6XX(46b00001_00001020, "clz.b r0.y, c8.x"),
+ INSTR_6XX(46700009_00000009, "bfrev.b r2.y, r2.y"),
+
+ /* cat3 */
+ INSTR_6XX(66000000_10421041, "sel.f16 hr0.x, hc16.y, hr0.x, hc16.z"),
+ INSTR_6XX(64848109_109a9099, "(rpt1)sel.b32 r2.y, c38.y, (r)r2.y, c38.z"),
+ INSTR_6XX(64810904_30521036, "(rpt1)sel.b32 r1.x, (r)c13.z, r0.z, (r)c20.z"),
+ INSTR_6XX(64818902_20041032, "(rpt1)sel.b32 r0.z, (r)c12.z, r0.w, (r)r1.x"),
+ INSTR_6XX(63820005_10315030, "mad.f32 r1.y, (neg)c12.x, r1.x, c12.y"),
+ INSTR_6XX(62050009_00091000, "mad.u24 r2.y, c0.x, r2.z, r2.y"),
+ INSTR_6XX(61828008_00081033, "madsh.m16 r2.x, c12.w, r1.y, r2.x"),
+ INSTR_6XX(65900820_100cb008, "(nop3) shlg hr8.x, 8, hr8.x, 12"), /* (nop3) shlg.b16 hr8.x, (r)8, (r)hr8.x, 12; */
+ INSTR_6XX(65ae085c_0002a001, "(nop3) shlg hr23.x, hr0.y, hr23.x, hr0.z"), /* not seen in blob */
+ INSTR_6XX(65900820_0c0aac05, "(nop3) shlg hr8.x, hc<a0.x + 5>, hr8.x, hc<a0.x + 10>"), /* not seen in blob */
+ INSTR_6XX(65ae0c5c_0002a001, "(nop3) shlg r23.x, r0.y, r23.x, r0.z"), /* (nop3) shlg.b32 r23.x, (r)r0.y, (r)r23.x, r0.z */
+ INSTR_6XX(64018802_0002e003, "(nop3) shrm hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
+ INSTR_6XX(64818802_0002e003, "(nop3) shlm hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
+ INSTR_6XX(65018802_0002e003, "(nop3) shrg hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
+ INSTR_6XX(66018802_0002e003, "(nop3) andg hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
+ INSTR_6XX(67018802_1002e003, "(nop3) wmm hr0.z, (neg)hr0.w, hr0.w, 2"), /* (nop3) wmm.f16f16 hr0.z, (abs)(r)hr0.w, (r)hr0.w, 2 */
+ INSTR_6XX(67018c02_1002e003, "(nop3) wmm.accu hr0.z, (neg)hr0.w, hr0.w, 2"),
+ INSTR_6XX(6701c802_9002a003, "(nop3) wmm r0.z, r0.w, r0.w, 2"), /* (nop3) wmm.f32f32 r0.z, (r)r0.w, (r)r0.w, 2 */
+ /* custom test with qcom_dot8 function from cl_qcom_dot_product8 */
+ INSTR_6XX(66818c02_0002e003, "(sat)(nop3) dp2acc.mixed.low r0.z, r0.w, r0.w, r0.z"), /* (nop3) dp2acc (sat)r0.z, (signed)(low)(r)r0.w, (low)(r)r0.w, r0.z */
+ INSTR_6XX(6681c802_8002a003, "(nop3) dp4acc.unsigned.low r0.z, r0.w, r0.w, (neg)r0.z"), /* (nop3) dp4acc r0.z, (unsigned)(r)r0.w, (r)r0.w, (neg)r0.z */
+
+ /* cat4 */
+ INSTR_6XX(8010000a_00000003, "rcp r2.z, r0.w"),
+
+ /* cat5 */
+ /* dEQP-VK.glsl.derivate.dfdx.uniform_if.float_mediump */
+ INSTR_6XX(a3801102_00000001, "dsx (f32)(x)r0.z, r0.x"), /* dsx (f32)(xOOO)r0.z, r0.x */
+ /* dEQP-VK.glsl.derivate.dfdy.uniform_if.float_mediump */
+ INSTR_6XX(a3c01102_00000001, "dsy (f32)(x)r0.z, r0.x"), /* dsy (f32)(xOOO)r0.z, r0.x */
+ /* dEQP-VK.glsl.derivate.dfdxfine.uniform_loop.float_highp */
+ INSTR_6XX(a6001105_00000001, "dsxpp.1 (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */
+ INSTR_6XX(a6201105_00000001, "dsxpp.1.p (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */
+
+ INSTR_6XX(a2802f00_00000001, "getsize (u16)(xyzw)hr0.x, r0.x, t#0"),
+ INSTR_6XX(a0c89f04_c4600005, "sam.base1 (f32)(xyzw)r1.x, r0.z, s#3, t#2"), /* sam.s2en.mode6.base1 (f32)(xyzw)r1.x, r0.z, 35 */
+ INSTR_6XX(a1c85f00_c0200005, "getlod.base0 (s32)(xyzw)r0.x, r0.z, s#1, t#0"), /* getlod.s2en.mode6.base0 (s32)(xyzw)r0.x, r0.z, 1 */
+ INSTR_6XX(a1000f00_00000004, "samb (f16)(xyzw)hr0.x, hr0.z, hr0.x, s#0, t#0"),
+ INSTR_6XX(a1000f00_00000003, "samb (f16)(xyzw)hr0.x, r0.y, r0.x, s#0, t#0"),
+ INSTR_6XX(a0c00f00_04400002, "sam (f16)(xyzw)hr0.x, hr0.y, s#2, t#2"),
+ INSTR_6XX(a6c02f00_00000000, "rgetinfo (u16)(xyzw)hr0.x"),
+ INSTR_6XX(a3482f08_c0000000, "getinfo.base0 (u16)(xyzw)hr2.x, t#0"),
+ /* dEQP-GLES31.functional.texture.texture_buffer.render.as_fragment_texture.buffer_size_65536 */
+ INSTR_5XX(a2c03102_00000000, "getbuf (u32)(x)r0.z, t#0"),
+ INSTR_6XX(a0c81f00_e0200005, "sam.base0 (f32)(xyzw)r0.x, r0.z, s#1, a1.x"),
+ INSTR_6XX(a0c81108_e2000001, "sam.base0 (f32)(x)r2.x, r0.x, s#16, a1.x"),
+ INSTR_6XX(a048d107_cc080a07, "isaml.base3 (s32)(x)r1.w, r0.w, r1.y, s#0, t#6"),
+
+
+ /* dEQP-VK.subgroups.arithmetic.compute.subgroupadd_float */
+ INSTR_6XX(a7c03102_00100003, "brcst.active.w8 (u32)(x)r0.z, r0.y"), /* brcst.active.w8 (u32)(xOOO)r0.z, r0.y */
+ /* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */
+ INSTR_6XX(b7e03107_00000401, "(sy)quad_shuffle.brcst (u32)(x)r1.w, r0.x, r0.z"), /* (sy)quad_shuffle.brcst (u32)(xOOO)r1.w, r0.x, r0.z */
+ /* dEQP-VK.subgroups.quad.graphics.subgroupquadswapdiagonal_int */
+ INSTR_6XX(b7e03104_00180001, "(sy)quad_shuffle.diag (u32)(x)r1.x, r0.x"), /* (sy)quad_shuffle.diag (u32)(xOOO)r1.x, r0.x */
+
+ /* cat6 */
+
+ INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"), /* ldgb.a.untyped.1dtype.u32.1 r0.x, g[r1.x], r0.x, 0 */
+ INSTR_5XX(d7660204_02000a01, "(sy)stib.typed.2d.u32.1 g[1], r0.x, r0.z, r1.x"), /* (sy)stib.a.u32.2d.1 g[r1.x], r0.x, r0.z, 1. r1.x is offset in ibo, r0.x is value*/
+ /* dEQP-VK.image.load_store.1d_array.r8g8b8a8_unorm */
+ INSTR_5XX(c1a20006_0600ba01, "ldib.typed.2d.f32.4 r1.z, g[0], r0.z, r1.z"), /* ldib.a.f32.2d.4 r1.z, g[r0.z], r1.z, 0. r0.z is offset in ibo as src. r1.z */
+ /* dEQP-VK.image.load_store.3d.r32g32b32a32_sint */
+ INSTR_5XX(c1aa0003_0500fc01, "ldib.typed.3d.s32.4 r0.w, g[0], r0.w, r1.y"), /* ldib.a.s32.3d.4 r0.w, g[r0.w], r1.y, 0. r0.w is offset in ibo as src, and dst */
+ /* dEQP-VK.binding_model.shader_access.primary_cmd_buf.storage_image.vertex.descriptor_array.3d */
+ INSTR_5XX(c1a20204_0401fc01, "ldib.typed.3d.f32.4 r1.x, g[1], r1.w, r1.x"), /* ldib.a.f32.3d.4 r1.x, g[r1.w], r1.x, 1 */
+ /* dEQP-VK.binding_model.shader_access.secondary_cmd_buf.with_push.storage_texel_buffer.vertex_fragment.single_descriptor.offset_zero */
+ INSTR_5XX(c1a20005_0501be01, "ldib.typed.4d.f32.4 r1.y, g[0], r1.z, r1.y"), /* ldib.a.f32.1dtype.4 r1.y, g[r1.z], r1.y, 0 */
+ /* dEQP-VK.texture.filtering.cube.formats.r8g8b8a8_snorm_nearest */
+ INSTR_5XX(c1a60200_0000ba01, "ldib.typed.2d.u32.4 r0.x, g[1], r0.z, r0.x"), /* ldib.a.u32.2d.4 r0.x, g[r0.z], r0.x, 1 */
+
+ // TODO is this a real instruction? Or float -6.0 ?
+ // INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true),
+ /* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */
+ INSTR_6XX(c0d20906_02800004, "stg.a.f32 g[r1.x+(r1.z)<<2], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */
+ INSTR_6XX(c0da052e_01800042, "stg.a.s32 g[r0.z+(r11.z)<<2], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */
+ INSTR_6XX(c0dc052e_01800042, "stg.a.u8 g[r0.z+(r11.z)<<2], hr8.y, 1"),
+ INSTR_6XX(c0ca0505_03800042, "stg.s32 g[r0.z+5], r8.y, 3"),
+ INSTR_6XX(c0ca0500_03800042, "stg.s32 g[r0.z], r8.y, 3"),
+ INSTR_6XX(c0ca0531_03800242, "stg.s32 g[r0.z+305], r8.y, 3"),
+ INSTR_5XX(c0ce0100_02800000, "stg.s8 g[r0.x], hr0.x, 2"),
+ INSTR_5XX(c0c00100_02800000, "stg.f16 g[r0.x], hr0.x, 2"),
+
+ /* Customely crafted */
+ INSTR_6XX(c0d61104_01800228, "stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1"),
+ INSTR_6XX(c0d61104_01802628, "stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1"),
+
+ INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
+ INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
+ INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
+ INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),
+ INSTR_6XX(c0060003_0180c269, "ldg.u32 r0.w, g[r0.w+308], 1"),
+ INSTR_6XX(c0040003_0180c269, "ldg.u16 hr0.w, g[r0.w+308], 1"),
+
+ /* Found in TCS/TES shaders of GTA V */
+ INSTR_6XX(c0020007_03c1420f, "ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3"), /* ldg.a.f32 r1.w, g[r1.y+((r1.w+1)<<2)], 3 */
+
+ /* Customely crafted */
+ INSTR_6XX(c0020007_03c1740f, "ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3"),
+
+ INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
+ INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
+ INSTR_6XX(c0000006_01c18017, "ldg.a.f16 hr1.z, g[r1.z+(r2.w)<<2], 1"),
+ INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
+ INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),
+
+ /* dEQP-GLES3.functional.ubo.random.basic_arrays.0 */
+ INSTR_6XX(c7020020_01800000, "stc.f32 c[32], r0.x, 1"), /* stc c[32], r0.x, 1 */
+ /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */
+ INSTR_6XX(c7060020_03800000, "stc.u32 c[32], r0.x, 3"), /* stc c[32], r0.x, 3 */
+
+ /* custom */
+ INSTR_6XX(c7060100_03800000, "stc.u32 c[a1.x], r0.x, 3"), /* stc c[a1.x], r0.x, 3 */
+ INSTR_6XX(c7060120_03800000, "stc.u32 c[a1.x+32], r0.x, 3"), /* stc c[a1.x+32], r0.x, 3 */
+
+ /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */
+ INSTR_6XX(c0260200_03676100, "stib.b.untyped.1d.u32.3.imm.base0 r0.x, r0.w, 1"), /* stib.untyped.u32.1d.3.mode4.base0 r0.x, r0.w, 1 */
+
+ INSTR_6XX(c0240402_00674100, "stib.b.untyped.1d.u16.1.imm.base0 hr0.z, r0.x, 2"),
#if 0
/* TODO blob sometimes/frequently sets b0, although there does not seem
* to be an obvious pattern and our encoding never sets it. AFAICT it
@@ -298,6 +332,13 @@ static const struct test {
INSTR_6XX(c0260000_00c78080, "ldc.offset0.1.nonuniform r0.x, 0, r0.x"), /* ldc.1.mode2.base0 r0.x, 0, r0.x */
INSTR_6XX(c0260201_00c78080, "ldc.offset0.1.nonuniform r0.y, 0, r0.y"), /* ldc.1.mode2.base0 r0.y, 0, r0.y */
+ /* a4xx-a5xx has the exact same instrs in
+ * dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.(dynamically_)uniform_fragment
+ * with no change based on the mode. Note that we can't decode this yet.
+ */
+ /* INSTR_4XX(c7860000_00810001), */ /* ldc.1 r0.x, g[r1.x], 0, r0.x */
+ /* INSTR_5XX(c7860000_00800000), */ /* ldc.a.1 r0.x, g[r0.x], 0, r0.x */
+
/* custom */
INSTR_6XX(c0260201_ffc78080, "ldc.offset0.1.nonuniform r0.y, 255, r0.y"), /* ldc.1.mode2.base0 r0.y, 255, r0.y */
@@ -307,6 +348,11 @@ static const struct test {
INSTR_6XX(c0260000_00478400, "ldc.offset2.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
INSTR_6XX(c0260000_00478600, "ldc.offset3.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
+ /* dEQP-VK.glsl.conditionals.if.if_else_vertex */
+ INSTR_6XX(c0360000_00c78100, "ldc.1.k.imm.base0 c[a1.x], 0, 0"), /* ldc.1.k.mode4.base0 c[a1.x], 0, 0 */
+ /* custom */
+ INSTR_6XX(c0360003_00c78100, "ldc.4.k.imm.base0 c[a1.x], 0, 0"), /* ldc.4.k.mode4.base0 c[a1.x], 0, 0 */
+
/* dEQP-VK.glsl.struct.local.nested_struct_array_dynamic_index_fragment */
INSTR_6XX(c1425b50_01803e02, "stp.f32 p[r11.y-176], r0.y, 1"),
INSTR_6XX(c1425b98_02803e14, "stp.f32 p[r11.y-104], r2.z, 2"),
@@ -318,14 +364,17 @@ static const struct test {
/* Atomic: */
#if 0
/* TODO our encoding differs in b53 for these two */
- INSTR_5XX(c4d60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
- INSTR_5XX(c4160205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
+ INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
+ INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
#else
- INSTR_5XX(c4f60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
- INSTR_5XX(c4360205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
+ INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
+ INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
#endif
INSTR_6XX(d5c60003_03008001, "(sy)atomic.max.untyped.1d.u32.1.l r0.w, l[r0.z], r0.w"),
+ /* dEQP-VK.glsl.atomic_operations.add_unsigned_compute_reference */
+ INSTR_6XX(c4160002_02000001, "atomic.g.add.untyped.1d.u32.1.g r0.z, r0.x, r0.z"),
+
/* Bindless atomic: */
INSTR_6XX(c03a0003_01640000, "atomic.b.add.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.add.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
INSTR_6XX(c03a0003_01660000, "atomic.b.and.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.and.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
@@ -333,10 +382,14 @@ static const struct test {
/* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_literal.fragment.sampler2d */
INSTR_6XX(a0c01f04_0cc00005, "sam (f32)(xyzw)r1.x, r0.z, s#6, t#6"),
- /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.uniform.fragment.sampler2d (looks like maybe the compiler didn't figure out */
- INSTR_6XX(a0c81f07_0100000b, "sam.s2en (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode0 (f32)(xyzw)r1.w, r1.y, hr2.x */
+
+ /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.uniform.fragment.sampler2d */
+ INSTR_4XX(a0c81f02_00800001, "sam.s2en.uniform (f32)(xyzw)r0.z, r0.x, hr1.x"), /* sam.s2en.mode0 (f32)(xyzw)r0.z, r0.x, hr1.x */ /* same for 5xx */
+ INSTR_6XX(a0c81f07_0100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode0 (f32)(xyzw)r1.w, r1.y, hr2.x */
+
/* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler2d */
- INSTR_6XX(a0c81f07_8100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x", .parse_fail=true), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */
+ INSTR_4XX(a0c81f02_80800001, "sam.s2en.nonuniform (f32)(xyzw)r0.z, r0.x, hr1.x"), /* sam.s2en.uniform (f32)(xyzw)r0.z, r0.x, hr1.x */ /* same for 5xx */
+ INSTR_6XX(a0c81f07_8100000b, "sam.s2en.nonuniform (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */
/* NonUniform: */
/* dEQP-VK.descriptor_indexing.storage_buffer */
@@ -349,6 +402,9 @@ static const struct test {
/* dEQP-VK.descriptor_indexing.sampler */
INSTR_6XX(a0c81f00_40000005, "sam.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.x"),
+ /* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */
+ INSTR_6XX(c0260001_00c98000, "getfiberid.u32 r0.y"),
+
/* Custom test since we've never seen the blob emit these. */
INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"),
INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"),
@@ -416,7 +472,6 @@ main(int argc, char **argv)
printf(" Got: \"%s\"\n", disasm_output);
retval = 1;
decode_fails++;
- continue;
}
/*
@@ -426,7 +481,8 @@ main(int argc, char **argv)
unsigned gen = test->gpu_id / 100;
if (!compilers[gen]) {
dev_ids[gen].gpu_id = test->gpu_id;
- compilers[gen] = ir3_compiler_create(NULL, &dev_ids[gen], false);
+ compilers[gen] = ir3_compiler_create(NULL, &dev_ids[gen],
+ &(struct ir3_compiler_options){});
}
FILE *fasm =
diff --git a/lib/mesa/src/freedreno/registers/dsi/dsi_phy_5nm.xml b/lib/mesa/src/freedreno/registers/dsi/dsi_phy_5nm.xml
deleted file mode 100644
index 7e3505b9e..000000000
--- a/lib/mesa/src/freedreno/registers/dsi/dsi_phy_5nm.xml
+++ /dev/null
@@ -1,228 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<database xmlns="http://nouveau.freedesktop.org/"
-xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
-<import file="freedreno_copyright.xml"/>
-
-<domain name="DSI_5nm_PHY_CMN" width="32">
- <reg32 offset="0x00000" name="REVISION_ID0"/>
- <reg32 offset="0x00004" name="REVISION_ID1"/>
- <reg32 offset="0x00008" name="REVISION_ID2"/>
- <reg32 offset="0x0000c" name="REVISION_ID3"/>
- <reg32 offset="0x00010" name="CLK_CFG0"/>
- <reg32 offset="0x00014" name="CLK_CFG1"/>
- <reg32 offset="0x00018" name="GLBL_CTRL"/>
- <reg32 offset="0x0001c" name="RBUF_CTRL"/>
- <reg32 offset="0x00020" name="VREG_CTRL_0"/>
- <reg32 offset="0x00024" name="CTRL_0"/>
- <reg32 offset="0x00028" name="CTRL_1"/>
- <reg32 offset="0x0002c" name="CTRL_2"/>
- <reg32 offset="0x00030" name="CTRL_3"/>
- <reg32 offset="0x00034" name="LANE_CFG0"/>
- <reg32 offset="0x00038" name="LANE_CFG1"/>
- <reg32 offset="0x0003c" name="PLL_CNTRL"/>
- <reg32 offset="0x00040" name="DPHY_SOT"/>
- <reg32 offset="0x000a0" name="LANE_CTRL0"/>
- <reg32 offset="0x000a4" name="LANE_CTRL1"/>
- <reg32 offset="0x000a8" name="LANE_CTRL2"/>
- <reg32 offset="0x000ac" name="LANE_CTRL3"/>
- <reg32 offset="0x000b0" name="LANE_CTRL4"/>
- <reg32 offset="0x000b4" name="TIMING_CTRL_0"/>
- <reg32 offset="0x000b8" name="TIMING_CTRL_1"/>
- <reg32 offset="0x000bc" name="TIMING_CTRL_2"/>
- <reg32 offset="0x000c0" name="TIMING_CTRL_3"/>
- <reg32 offset="0x000c4" name="TIMING_CTRL_4"/>
- <reg32 offset="0x000c8" name="TIMING_CTRL_5"/>
- <reg32 offset="0x000cc" name="TIMING_CTRL_6"/>
- <reg32 offset="0x000d0" name="TIMING_CTRL_7"/>
- <reg32 offset="0x000d4" name="TIMING_CTRL_8"/>
- <reg32 offset="0x000d8" name="TIMING_CTRL_9"/>
- <reg32 offset="0x000dc" name="TIMING_CTRL_10"/>
- <reg32 offset="0x000e0" name="TIMING_CTRL_11"/>
- <reg32 offset="0x000e4" name="TIMING_CTRL_12"/>
- <reg32 offset="0x000e8" name="TIMING_CTRL_13"/>
- <reg32 offset="0x000ec" name="GLBL_HSTX_STR_CTRL_0"/>
- <reg32 offset="0x000f0" name="GLBL_HSTX_STR_CTRL_1"/>
- <reg32 offset="0x000f4" name="GLBL_RESCODE_OFFSET_TOP_CTRL"/>
- <reg32 offset="0x000f8" name="GLBL_RESCODE_OFFSET_BOT_CTRL"/>
- <reg32 offset="0x000fc" name="GLBL_RESCODE_OFFSET_MID_CTRL"/>
- <reg32 offset="0x00100" name="GLBL_LPTX_STR_CTRL"/>
- <reg32 offset="0x00104" name="GLBL_PEMPH_CTRL_0"/>
- <reg32 offset="0x00108" name="GLBL_PEMPH_CTRL_1"/>
- <reg32 offset="0x0010c" name="GLBL_STR_SWI_CAL_SEL_CTRL"/>
- <reg32 offset="0x00110" name="VREG_CTRL_1"/>
- <reg32 offset="0x00114" name="CTRL_4"/>
- <reg32 offset="0x00140" name="PHY_STATUS"/>
- <reg32 offset="0x00148" name="LANE_STATUS0"/>
- <reg32 offset="0x0014c" name="LANE_STATUS1"/>
-</domain>
-
-<domain name="DSI_5nm_PHY" width="32">
- <array offset="0x00000" name="LN" length="5" stride="0x80">
- <reg32 offset="0x00" name="CFG0"/>
- <reg32 offset="0x04" name="CFG1"/>
- <reg32 offset="0x08" name="CFG2"/>
- <reg32 offset="0x0c" name="TEST_DATAPATH"/>
- <reg32 offset="0x10" name="PIN_SWAP"/>
- <reg32 offset="0x14" name="LPRX_CTRL"/>
- <reg32 offset="0x18" name="TX_DCTRL"/>
- </array>
-</domain>
-
-<domain name="DSI_5nm_PHY_PLL" width="32">
- <reg32 offset="0x0000" name="ANALOG_CONTROLS_ONE"/>
- <reg32 offset="0x0004" name="ANALOG_CONTROLS_TWO"/>
- <reg32 offset="0x0008" name="INT_LOOP_SETTINGS"/>
- <reg32 offset="0x000c" name="INT_LOOP_SETTINGS_TWO"/>
- <reg32 offset="0x0010" name="ANALOG_CONTROLS_THREE"/>
- <reg32 offset="0x0014" name="ANALOG_CONTROLS_FOUR"/>
- <reg32 offset="0x0018" name="ANALOG_CONTROLS_FIVE"/>
- <reg32 offset="0x001c" name="INT_LOOP_CONTROLS"/>
- <reg32 offset="0x0020" name="DSM_DIVIDER"/>
- <reg32 offset="0x0024" name="FEEDBACK_DIVIDER"/>
- <reg32 offset="0x0028" name="SYSTEM_MUXES"/>
- <reg32 offset="0x002c" name="FREQ_UPDATE_CONTROL_OVERRIDES"/>
- <reg32 offset="0x0030" name="CMODE"/>
- <reg32 offset="0x0034" name="PSM_CTRL"/>
- <reg32 offset="0x0038" name="RSM_CTRL"/>
- <reg32 offset="0x003c" name="VCO_TUNE_MAP"/>
- <reg32 offset="0x0040" name="PLL_CNTRL"/>
- <reg32 offset="0x0044" name="CALIBRATION_SETTINGS"/>
- <reg32 offset="0x0048" name="BAND_SEL_CAL_TIMER_LOW"/>
- <reg32 offset="0x004c" name="BAND_SEL_CAL_TIMER_HIGH"/>
- <reg32 offset="0x0050" name="BAND_SEL_CAL_SETTINGS"/>
- <reg32 offset="0x0054" name="BAND_SEL_MIN"/>
- <reg32 offset="0x0058" name="BAND_SEL_MAX"/>
- <reg32 offset="0x005c" name="BAND_SEL_PFILT"/>
- <reg32 offset="0x0060" name="BAND_SEL_IFILT"/>
- <reg32 offset="0x0064" name="BAND_SEL_CAL_SETTINGS_TWO"/>
- <reg32 offset="0x0068" name="BAND_SEL_CAL_SETTINGS_THREE"/>
- <reg32 offset="0x006c" name="BAND_SEL_CAL_SETTINGS_FOUR"/>
- <reg32 offset="0x0070" name="BAND_SEL_ICODE_HIGH"/>
- <reg32 offset="0x0074" name="BAND_SEL_ICODE_LOW"/>
- <reg32 offset="0x0078" name="FREQ_DETECT_SETTINGS_ONE"/>
- <reg32 offset="0x007c" name="FREQ_DETECT_THRESH"/>
- <reg32 offset="0x0080" name="FREQ_DET_REFCLK_HIGH"/>
- <reg32 offset="0x0084" name="FREQ_DET_REFCLK_LOW"/>
- <reg32 offset="0x0088" name="FREQ_DET_PLLCLK_HIGH"/>
- <reg32 offset="0x008c" name="FREQ_DET_PLLCLK_LOW"/>
- <reg32 offset="0x0090" name="PFILT"/>
- <reg32 offset="0x0094" name="IFILT"/>
- <reg32 offset="0x0098" name="PLL_GAIN"/>
- <reg32 offset="0x009c" name="ICODE_LOW"/>
- <reg32 offset="0x00a0" name="ICODE_HIGH"/>
- <reg32 offset="0x00a4" name="LOCKDET"/>
- <reg32 offset="0x00a8" name="OUTDIV"/>
- <reg32 offset="0x00ac" name="FASTLOCK_CONTROL"/>
- <reg32 offset="0x00b0" name="PASS_OUT_OVERRIDE_ONE"/>
- <reg32 offset="0x00b4" name="PASS_OUT_OVERRIDE_TWO"/>
- <reg32 offset="0x00b8" name="CORE_OVERRIDE"/>
- <reg32 offset="0x00bc" name="CORE_INPUT_OVERRIDE"/>
- <reg32 offset="0x00c0" name="RATE_CHANGE"/>
- <reg32 offset="0x00c4" name="PLL_DIGITAL_TIMERS"/>
- <reg32 offset="0x00c8" name="PLL_DIGITAL_TIMERS_TWO"/>
- <reg32 offset="0x00cc" name="DECIMAL_DIV_START"/>
- <reg32 offset="0x00d0" name="FRAC_DIV_START_LOW"/>
- <reg32 offset="0x00d4" name="FRAC_DIV_START_MID"/>
- <reg32 offset="0x00d8" name="FRAC_DIV_START_HIGH"/>
- <reg32 offset="0x00dc" name="DEC_FRAC_MUXES"/>
- <reg32 offset="0x00e0" name="DECIMAL_DIV_START_1"/>
- <reg32 offset="0x00e4" name="FRAC_DIV_START_LOW_1"/>
- <reg32 offset="0x00e8" name="FRAC_DIV_START_MID_1"/>
- <reg32 offset="0x00ec" name="FRAC_DIV_START_HIGH_1"/>
- <reg32 offset="0x00f0" name="DECIMAL_DIV_START_2"/>
- <reg32 offset="0x00f4" name="FRAC_DIV_START_LOW_2"/>
- <reg32 offset="0x00f8" name="FRAC_DIV_START_MID_2"/>
- <reg32 offset="0x00fc" name="FRAC_DIV_START_HIGH_2"/>
- <reg32 offset="0x0100" name="MASH_CONTROL"/>
- <reg32 offset="0x0104" name="SSC_STEPSIZE_LOW"/>
- <reg32 offset="0x0108" name="SSC_STEPSIZE_HIGH"/>
- <reg32 offset="0x010c" name="SSC_DIV_PER_LOW"/>
- <reg32 offset="0x0110" name="SSC_DIV_PER_HIGH"/>
- <reg32 offset="0x0114" name="SSC_ADJPER_LOW"/>
- <reg32 offset="0x0118" name="SSC_ADJPER_HIGH"/>
- <reg32 offset="0x011c" name="SSC_MUX_CONTROL"/>
- <reg32 offset="0x0120" name="SSC_STEPSIZE_LOW_1"/>
- <reg32 offset="0x0124" name="SSC_STEPSIZE_HIGH_1"/>
- <reg32 offset="0x0128" name="SSC_DIV_PER_LOW_1"/>
- <reg32 offset="0x012c" name="SSC_DIV_PER_HIGH_1"/>
- <reg32 offset="0x0130" name="SSC_ADJPER_LOW_1"/>
- <reg32 offset="0x0134" name="SSC_ADJPER_HIGH_1"/>
- <reg32 offset="0x0138" name="SSC_STEPSIZE_LOW_2"/>
- <reg32 offset="0x013c" name="SSC_STEPSIZE_HIGH_2"/>
- <reg32 offset="0x0140" name="SSC_DIV_PER_LOW_2"/>
- <reg32 offset="0x0144" name="SSC_DIV_PER_HIGH_2"/>
- <reg32 offset="0x0148" name="SSC_ADJPER_LOW_2"/>
- <reg32 offset="0x014c" name="SSC_ADJPER_HIGH_2"/>
- <reg32 offset="0x0150" name="SSC_CONTROL"/>
- <reg32 offset="0x0154" name="PLL_OUTDIV_RATE"/>
- <reg32 offset="0x0158" name="PLL_LOCKDET_RATE_1"/>
- <reg32 offset="0x015c" name="PLL_LOCKDET_RATE_2"/>
- <reg32 offset="0x0160" name="PLL_PROP_GAIN_RATE_1"/>
- <reg32 offset="0x0164" name="PLL_PROP_GAIN_RATE_2"/>
- <reg32 offset="0x0168" name="PLL_BAND_SEL_RATE_1"/>
- <reg32 offset="0x016c" name="PLL_BAND_SEL_RATE_2"/>
- <reg32 offset="0x0170" name="PLL_INT_GAIN_IFILT_BAND_1"/>
- <reg32 offset="0x0174" name="PLL_INT_GAIN_IFILT_BAND_2"/>
- <reg32 offset="0x0178" name="PLL_FL_INT_GAIN_PFILT_BAND_1"/>
- <reg32 offset="0x017c" name="PLL_FL_INT_GAIN_PFILT_BAND_2"/>
- <reg32 offset="0x0180" name="PLL_FASTLOCK_EN_BAND"/>
- <reg32 offset="0x0184" name="FREQ_TUNE_ACCUM_INIT_MID"/>
- <reg32 offset="0x0188" name="FREQ_TUNE_ACCUM_INIT_HIGH"/>
- <reg32 offset="0x018c" name="FREQ_TUNE_ACCUM_INIT_MUX"/>
- <reg32 offset="0x0190" name="PLL_LOCK_OVERRIDE"/>
- <reg32 offset="0x0194" name="PLL_LOCK_DELAY"/>
- <reg32 offset="0x0198" name="PLL_LOCK_MIN_DELAY"/>
- <reg32 offset="0x019c" name="CLOCK_INVERTERS"/>
- <reg32 offset="0x01a0" name="SPARE_AND_JPC_OVERRIDES"/>
- <reg32 offset="0x01a4" name="BIAS_CONTROL_1"/>
- <reg32 offset="0x01a8" name="BIAS_CONTROL_2"/>
- <reg32 offset="0x01ac" name="ALOG_OBSV_BUS_CTRL_1"/>
- <reg32 offset="0x01b0" name="COMMON_STATUS_ONE"/>
- <reg32 offset="0x01b4" name="COMMON_STATUS_TWO"/>
- <reg32 offset="0x01b8" name="BAND_SEL_CAL"/>
- <reg32 offset="0x01bc" name="ICODE_ACCUM_STATUS_LOW"/>
- <reg32 offset="0x01c0" name="ICODE_ACCUM_STATUS_HIGH"/>
- <reg32 offset="0x01c4" name="FD_OUT_LOW"/>
- <reg32 offset="0x01c8" name="FD_OUT_HIGH"/>
- <reg32 offset="0x01cc" name="ALOG_OBSV_BUS_STATUS_1"/>
- <reg32 offset="0x01d0" name="PLL_MISC_CONFIG"/>
- <reg32 offset="0x01d4" name="FLL_CONFIG"/>
- <reg32 offset="0x01d8" name="FLL_FREQ_ACQ_TIME"/>
- <reg32 offset="0x01dc" name="FLL_CODE0"/>
- <reg32 offset="0x01e0" name="FLL_CODE1"/>
- <reg32 offset="0x01e4" name="FLL_GAIN0"/>
- <reg32 offset="0x01e8" name="FLL_GAIN1"/>
- <reg32 offset="0x01ec" name="SW_RESET"/>
- <reg32 offset="0x01f0" name="FAST_PWRUP"/>
- <reg32 offset="0x01f4" name="LOCKTIME0"/>
- <reg32 offset="0x01f8" name="LOCKTIME1"/>
- <reg32 offset="0x01fc" name="DEBUG_BUS_SEL"/>
- <reg32 offset="0x0200" name="DEBUG_BUS0"/>
- <reg32 offset="0x0204" name="DEBUG_BUS1"/>
- <reg32 offset="0x0208" name="DEBUG_BUS2"/>
- <reg32 offset="0x020c" name="DEBUG_BUS3"/>
- <reg32 offset="0x0210" name="ANALOG_FLL_CONTROL_OVERRIDES"/>
- <reg32 offset="0x0214" name="VCO_CONFIG"/>
- <reg32 offset="0x0218" name="VCO_CAL_CODE1_MODE0_STATUS"/>
- <reg32 offset="0x021c" name="VCO_CAL_CODE1_MODE1_STATUS"/>
- <reg32 offset="0x0220" name="RESET_SM_STATUS"/>
- <reg32 offset="0x0224" name="TDC_OFFSET"/>
- <reg32 offset="0x0228" name="PS3_PWRDOWN_CONTROLS"/>
- <reg32 offset="0x022c" name="PS4_PWRDOWN_CONTROLS"/>
- <reg32 offset="0x0230" name="PLL_RST_CONTROLS"/>
- <reg32 offset="0x0234" name="GEAR_BAND_SELECT_CONTROLS"/>
- <reg32 offset="0x0238" name="PSM_CLK_CONTROLS"/>
- <reg32 offset="0x023c" name="SYSTEM_MUXES_2"/>
- <reg32 offset="0x0240" name="VCO_CONFIG_1"/>
- <reg32 offset="0x0244" name="VCO_CONFIG_2"/>
- <reg32 offset="0x0248" name="CLOCK_INVERTERS_1"/>
- <reg32 offset="0x024c" name="CLOCK_INVERTERS_2"/>
- <reg32 offset="0x0250" name="CMODE_1"/>
- <reg32 offset="0x0254" name="CMODE_2"/>
- <reg32 offset="0x0258" name="ANALOG_CONTROLS_FIVE_1"/>
- <reg32 offset="0x025c" name="ANALOG_CONTROLS_FIVE_2"/>
- <reg32 offset="0x0260" name="PERF_OPTIMIZE"/>
-</domain>
-
-</database>
diff --git a/lib/mesa/src/freedreno/vulkan/tu_clear_blit.c b/lib/mesa/src/freedreno/vulkan/tu_clear_blit.c
index 8d38a8fd0..6caa31beb 100644
--- a/lib/mesa/src/freedreno/vulkan/tu_clear_blit.c
+++ b/lib/mesa/src/freedreno/vulkan/tu_clear_blit.c
@@ -30,27 +30,27 @@ tu_pack_float32_for_unorm(float val, int bits)
/* r2d_ = BLIT_OP_SCALE operations */
static enum a6xx_2d_ifmt
-format_to_ifmt(VkFormat format)
+format_to_ifmt(enum pipe_format format)
{
- if (format == VK_FORMAT_D24_UNORM_S8_UINT ||
- format == VK_FORMAT_X8_D24_UNORM_PACK32)
+ if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
+ format == PIPE_FORMAT_Z24X8_UNORM)
return R2D_UNORM8;
/* get_component_bits doesn't work with depth/stencil formats: */
- if (format == VK_FORMAT_D16_UNORM || format == VK_FORMAT_D32_SFLOAT)
+ if (format == PIPE_FORMAT_Z16_UNORM || format == PIPE_FORMAT_Z32_FLOAT)
return R2D_FLOAT32;
- if (format == VK_FORMAT_S8_UINT)
+ if (format == PIPE_FORMAT_S8_UINT)
return R2D_INT8;
/* use the size of the red channel to find the corresponding "ifmt" */
- bool is_int = vk_format_is_int(format);
- switch (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
+ bool is_int = util_format_is_pure_integer(format);
+ switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
case 4: case 5: case 8:
return is_int ? R2D_INT8 : R2D_UNORM8;
case 10: case 11:
return is_int ? R2D_INT16 : R2D_FLOAT16;
case 16:
- if (vk_format_is_float(format))
+ if (util_format_is_float(format))
return R2D_FLOAT16;
return is_int ? R2D_INT16 : R2D_FLOAT32;
case 32:
@@ -82,38 +82,38 @@ r2d_coords(struct tu_cs *cs,
}
static void
-r2d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val)
+r2d_clear_value(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val)
{
uint32_t clear_value[4] = {};
switch (format) {
- case VK_FORMAT_X8_D24_UNORM_PACK32:
- case VK_FORMAT_D24_UNORM_S8_UINT:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_Z24X8_UNORM:
/* cleared as r8g8b8a8_unorm using special format */
clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24);
clear_value[1] = clear_value[0] >> 8;
clear_value[2] = clear_value[0] >> 16;
clear_value[3] = val->depthStencil.stencil;
break;
- case VK_FORMAT_D16_UNORM:
- case VK_FORMAT_D32_SFLOAT:
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
/* R2D_FLOAT32 */
clear_value[0] = fui(val->depthStencil.depth);
break;
- case VK_FORMAT_S8_UINT:
+ case PIPE_FORMAT_S8_UINT:
clear_value[0] = val->depthStencil.stencil;
break;
- case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
+ case PIPE_FORMAT_R9G9B9E5_FLOAT:
/* cleared as UINT32 */
clear_value[0] = float3_to_rgb9e5(val->color.float32);
break;
default:
- assert(!vk_format_is_depth_or_stencil(format));
- const struct util_format_description *desc = vk_format_description(format);
+ assert(!util_format_is_depth_or_stencil(format));
+ const struct util_format_description *desc = util_format_description(format);
enum a6xx_2d_ifmt ifmt = format_to_ifmt(format);
assert(desc && (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ||
- format == VK_FORMAT_B10G11R11_UFLOAT_PACK32));
+ format == PIPE_FORMAT_R11G11B10_FLOAT));
for (unsigned i = 0; i < desc->nr_channels; i++) {
const struct util_format_channel_description *ch = &desc->channel[i];
@@ -144,7 +144,7 @@ r2d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val)
static void
r2d_src(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
- const struct tu_image_view *iview,
+ const struct fdl6_view *iview,
uint32_t layer,
VkFilter filter)
{
@@ -162,6 +162,24 @@ r2d_src(struct tu_cmd_buffer *cmd,
}
static void
+r2d_src_depth(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ const struct tu_image_view *iview,
+ uint32_t layer,
+ VkFilter filter)
+{
+ tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5);
+ tu_cs_emit(cs, tu_image_view_depth(iview, SP_PS_2D_SRC_INFO));
+ tu_cs_emit(cs, iview->view.SP_PS_2D_SRC_SIZE);
+ tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer);
+ /* SP_PS_2D_SRC_PITCH has shifted pitch field */
+ tu_cs_emit(cs, iview->depth_PITCH << 9);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS, 3);
+ tu_cs_image_flag_ref(cs, &iview->view, layer);
+}
+
+static void
r2d_src_stencil(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
const struct tu_image_view *iview,
@@ -170,7 +188,7 @@ r2d_src_stencil(struct tu_cmd_buffer *cmd,
{
tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5);
tu_cs_emit(cs, tu_image_view_stencil(iview, SP_PS_2D_SRC_INFO) & ~A6XX_SP_PS_2D_SRC_INFO_FLAGS);
- tu_cs_emit(cs, iview->SP_PS_2D_SRC_SIZE);
+ tu_cs_emit(cs, iview->view.SP_PS_2D_SRC_SIZE);
tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
/* SP_PS_2D_SRC_PITCH has shifted pitch field */
tu_cs_emit(cs, iview->stencil_PITCH << 9);
@@ -179,17 +197,17 @@ r2d_src_stencil(struct tu_cmd_buffer *cmd,
static void
r2d_src_buffer(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
- VkFormat vk_format,
+ enum pipe_format format,
uint64_t va, uint32_t pitch,
uint32_t width, uint32_t height)
{
- struct tu_native_format format = tu6_format_texture(vk_format, TILE6_LINEAR);
+ struct tu_native_format fmt = tu6_format_texture(format, TILE6_LINEAR);
tu_cs_emit_regs(cs,
A6XX_SP_PS_2D_SRC_INFO(
- .color_format = format.fmt,
- .color_swap = format.swap,
- .srgb = vk_format_is_srgb(vk_format),
+ .color_format = fmt.fmt,
+ .color_swap = fmt.swap,
+ .srgb = util_format_is_srgb(format),
.unk20 = 1,
.unk22 = 1),
A6XX_SP_PS_2D_SRC_SIZE(.width = width, .height = height),
@@ -198,7 +216,7 @@ r2d_src_buffer(struct tu_cmd_buffer *cmd,
}
static void
-r2d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
+r2d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer)
{
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
tu_cs_emit(cs, iview->RB_2D_DST_INFO);
@@ -209,6 +227,18 @@ r2d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
}
static void
+r2d_dst_depth(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
+{
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
+ tu_cs_emit(cs, tu_image_view_depth(iview, RB_2D_DST_INFO));
+ tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer);
+ tu_cs_emit(cs, iview->depth_PITCH);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS, 3);
+ tu_cs_image_flag_ref(cs, &iview->view, layer);
+}
+
+static void
r2d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
{
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
@@ -218,15 +248,15 @@ r2d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t la
}
static void
-r2d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
+r2d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch)
{
- struct tu_native_format format = tu6_format_color(vk_format, TILE6_LINEAR);
+ struct tu_native_format fmt = tu6_format_color(format, TILE6_LINEAR);
tu_cs_emit_regs(cs,
A6XX_RB_2D_DST_INFO(
- .color_format = format.fmt,
- .color_swap = format.swap,
- .srgb = vk_format_is_srgb(vk_format)),
+ .color_format = fmt.fmt,
+ .color_swap = fmt.swap,
+ .srgb = util_format_is_srgb(format)),
A6XX_RB_2D_DST(.qword = va),
A6XX_RB_2D_DST_PITCH(pitch));
}
@@ -234,24 +264,25 @@ r2d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch
static void
r2d_setup_common(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
- VkFormat vk_format,
+ enum pipe_format format,
VkImageAspectFlags aspect_mask,
unsigned blit_param,
bool clear,
bool ubwc,
bool scissor)
{
- enum a6xx_format format = tu6_base_format(vk_format);
- enum a6xx_2d_ifmt ifmt = format_to_ifmt(vk_format);
+ enum a6xx_format fmt = tu6_base_format(format);
+ enum a6xx_2d_ifmt ifmt = format_to_ifmt(format);
+
uint32_t unknown_8c01 = 0;
- if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
- vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) {
- format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+ if ((format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
+ format == PIPE_FORMAT_Z24X8_UNORM) && ubwc) {
+ fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
}
/* note: the only format with partial clearing is D24S8 */
- if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
/* preserve stencil channel */
if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
unknown_8c01 = 0x08000041;
@@ -267,10 +298,10 @@ r2d_setup_common(struct tu_cmd_buffer *cmd,
.scissor = scissor,
.rotate = blit_param,
.solid_color = clear,
- .d24s8 = format == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear,
- .color_format = format,
+ .d24s8 = fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear,
+ .color_format = fmt,
.mask = 0xf,
- .ifmt = vk_format_is_srgb(vk_format) ? R2D_UNORM8_SRGB : ifmt,
+ .ifmt = util_format_is_srgb(format) ? R2D_UNORM8_SRGB : ifmt,
).value;
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
@@ -279,21 +310,21 @@ r2d_setup_common(struct tu_cmd_buffer *cmd,
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
tu_cs_emit(cs, blit_cntl);
- if (format == FMT6_10_10_10_2_UNORM_DEST)
- format = FMT6_16_16_16_16_FLOAT;
+ if (fmt == FMT6_10_10_10_2_UNORM_DEST)
+ fmt = FMT6_16_16_16_16_FLOAT;
tu_cs_emit_regs(cs, A6XX_SP_2D_DST_FORMAT(
- .sint = vk_format_is_sint(vk_format),
- .uint = vk_format_is_uint(vk_format),
- .color_format = format,
- .srgb = vk_format_is_srgb(vk_format),
+ .sint = util_format_is_pure_sint(format),
+ .uint = util_format_is_pure_uint(format),
+ .color_format = fmt,
+ .srgb = util_format_is_srgb(format),
.mask = 0xf));
}
static void
r2d_setup(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
- VkFormat vk_format,
+ enum pipe_format format,
VkImageAspectFlags aspect_mask,
unsigned blit_param,
bool clear,
@@ -302,9 +333,11 @@ r2d_setup(struct tu_cmd_buffer *cmd,
{
assert(samples == VK_SAMPLE_COUNT_1_BIT);
- tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM);
+ if (!cmd->state.pass) {
+ tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM);
+ }
- r2d_setup_common(cmd, cs, vk_format, aspect_mask, blit_param, clear, ubwc, false);
+ r2d_setup_common(cmd, cs, format, aspect_mask, blit_param, clear, ubwc, false);
}
static void
@@ -546,21 +579,25 @@ compile_shader(struct tu_device *dev, struct nir_shader *nir,
ir3_finalize_nir(dev->compiler, nir);
- struct ir3_shader *sh = ir3_shader_from_nir(dev->compiler, nir,
- align(consts, 4), NULL);
+ struct ir3_shader *sh =
+ ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
+ .api_wavesize = IR3_SINGLE_OR_DOUBLE,
+ .real_wavesize = IR3_SINGLE_OR_DOUBLE,
+ .reserved_user_consts = align(consts, 4),
+ }, NULL);
struct ir3_shader_key key = {};
bool created;
struct ir3_shader_variant *so =
ir3_shader_get_variant(sh, &key, false, false, &created);
- struct tu6_global *global = dev->global_bo.map;
+ struct tu6_global *global = dev->global_bo->map;
assert(*offset + so->info.sizedwords <= ARRAY_SIZE(global->shaders));
dev->global_shaders[idx] = so;
memcpy(&global->shaders[*offset], so->bin,
sizeof(uint32_t) * so->info.sizedwords);
- dev->global_shader_va[idx] = dev->global_bo.iova +
+ dev->global_shader_va[idx] = dev->global_bo->iova +
gb_offset(shaders[*offset]);
*offset += align(so->info.sizedwords, 32);
}
@@ -749,7 +786,7 @@ r3d_coords(struct tu_cs *cs,
}
static void
-r3d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val)
+r3d_clear_value(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val)
{
tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4);
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
@@ -760,8 +797,8 @@ r3d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val)
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
switch (format) {
- case VK_FORMAT_X8_D24_UNORM_PACK32:
- case VK_FORMAT_D24_UNORM_S8_UINT: {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT: {
/* cleared as r8g8b8a8_unorm using special format */
uint32_t tmp = tu_pack_float32_for_unorm(val->depthStencil.depth, 24);
tu_cs_emit(cs, fui((tmp & 0xff) / 255.0f));
@@ -769,14 +806,14 @@ r3d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val)
tu_cs_emit(cs, fui((tmp >> 16 & 0xff) / 255.0f));
tu_cs_emit(cs, fui((val->depthStencil.stencil & 0xff) / 255.0f));
} break;
- case VK_FORMAT_D16_UNORM:
- case VK_FORMAT_D32_SFLOAT:
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
tu_cs_emit(cs, fui(val->depthStencil.depth));
tu_cs_emit(cs, 0);
tu_cs_emit(cs, 0);
tu_cs_emit(cs, 0);
break;
- case VK_FORMAT_S8_UINT:
+ case PIPE_FORMAT_S8_UINT:
tu_cs_emit(cs, val->depthStencil.stencil & 0xff);
tu_cs_emit(cs, 0);
tu_cs_emit(cs, 0);
@@ -784,7 +821,7 @@ r3d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val)
break;
default:
/* as color formats use clear value as-is */
- assert(!vk_format_is_depth_or_stencil(format));
+ assert(!util_format_is_depth_or_stencil(format));
tu_cs_emit_array(cs, val->color.uint32, 4);
break;
}
@@ -823,7 +860,6 @@ r3d_src_common(struct tu_cmd_buffer *cmd,
A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE) |
0x60000; /* XXX used by blob, doesn't seem necessary */
texture.map[A6XX_TEX_CONST_DWORDS + 1] =
- 0x1 | /* XXX used by blob, doesn't seem necessary */
A6XX_TEX_SAMP_1_UNNORM_COORDS |
A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR;
texture.map[A6XX_TEX_CONST_DWORDS + 2] = 0;
@@ -854,7 +890,7 @@ r3d_src_common(struct tu_cmd_buffer *cmd,
static void
r3d_src(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
- const struct tu_image_view *iview,
+ const struct fdl6_view *iview,
uint32_t layer,
VkFilter filter)
{
@@ -867,23 +903,23 @@ r3d_src(struct tu_cmd_buffer *cmd,
static void
r3d_src_buffer(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
- VkFormat vk_format,
+ enum pipe_format format,
uint64_t va, uint32_t pitch,
uint32_t width, uint32_t height)
{
uint32_t desc[A6XX_TEX_CONST_DWORDS];
- struct tu_native_format format = tu6_format_texture(vk_format, TILE6_LINEAR);
+ struct tu_native_format fmt = tu6_format_texture(format, TILE6_LINEAR);
desc[0] =
- COND(vk_format_is_srgb(vk_format), A6XX_TEX_CONST_0_SRGB) |
- A6XX_TEX_CONST_0_FMT(format.fmt) |
- A6XX_TEX_CONST_0_SWAP(format.swap) |
+ COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
+ A6XX_TEX_CONST_0_FMT(fmt.fmt) |
+ A6XX_TEX_CONST_0_SWAP(fmt.swap) |
A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) |
// XXX to swizzle into .w for stencil buffer_to_image
- A6XX_TEX_CONST_0_SWIZ_Y(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Y) |
- A6XX_TEX_CONST_0_SWIZ_Z(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Z) |
- A6XX_TEX_CONST_0_SWIZ_W(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_W);
+ A6XX_TEX_CONST_0_SWIZ_Y(format == PIPE_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Y) |
+ A6XX_TEX_CONST_0_SWIZ_Z(format == PIPE_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Z) |
+ A6XX_TEX_CONST_0_SWIZ_W(format == PIPE_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_W);
desc[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
desc[2] =
A6XX_TEX_CONST_2_PITCH(pitch) |
@@ -901,16 +937,22 @@ static void
r3d_src_gmem(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
const struct tu_image_view *iview,
- VkFormat format,
+ enum pipe_format format,
uint32_t gmem_offset,
uint32_t cpp)
{
uint32_t desc[A6XX_TEX_CONST_DWORDS];
- memcpy(desc, iview->descriptor, sizeof(desc));
-
- /* patch the format so that depth/stencil get the right format */
- desc[0] &= ~A6XX_TEX_CONST_0_FMT__MASK;
- desc[0] |= A6XX_TEX_CONST_0_FMT(tu6_format_texture(format, TILE6_2).fmt);
+ memcpy(desc, iview->view.descriptor, sizeof(desc));
+
+ /* patch the format so that depth/stencil get the right format and swizzle */
+ desc[0] &= ~(A6XX_TEX_CONST_0_FMT__MASK |
+ A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK |
+ A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK);
+ desc[0] |= A6XX_TEX_CONST_0_FMT(tu6_format_texture(format, TILE6_2).fmt) |
+ A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) |
+ A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) |
+ A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) |
+ A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W);
/* patched for gmem */
desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
@@ -928,7 +970,7 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd,
}
static void
-r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
+r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer)
{
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
tu_cs_emit(cs, iview->RB_MRT_BUF_INFO);
@@ -938,10 +980,29 @@ r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3);
tu_cs_image_flag_ref(cs, iview, layer);
+ /* Use color format from RB_MRT_BUF_INFO. This register is relevant for
+ * FMT6_NV12_Y.
+ */
+ tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = iview->RB_MRT_BUF_INFO & 0xff));
+
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled));
}
static void
+r3d_dst_depth(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
+{
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
+ tu_cs_emit(cs, tu_image_view_depth(iview, RB_MRT_BUF_INFO));
+ tu_cs_image_depth_ref(cs, iview, layer);
+ tu_cs_emit(cs, 0);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3);
+ tu_cs_image_flag_ref(cs, &iview->view, layer);
+
+ tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->view.ubwc_enabled));
+}
+
+static void
r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
{
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
@@ -953,12 +1014,12 @@ r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t la
}
static void
-r3d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
+r3d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch)
{
- struct tu_native_format format = tu6_format_color(vk_format, TILE6_LINEAR);
+ struct tu_native_format fmt = tu6_format_color(format, TILE6_LINEAR);
tu_cs_emit_regs(cs,
- A6XX_RB_MRT_BUF_INFO(0, .color_format = format.fmt, .color_swap = format.swap),
+ A6XX_RB_MRT_BUF_INFO(0, .color_format = fmt.fmt, .color_swap = fmt.swap),
A6XX_RB_MRT_PITCH(0, pitch),
A6XX_RB_MRT_ARRAY_PITCH(0, 0),
A6XX_RB_MRT_BASE(0, .qword = va),
@@ -968,14 +1029,14 @@ r3d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch
}
static uint8_t
-aspect_write_mask(VkFormat vk_format, VkImageAspectFlags aspect_mask)
+aspect_write_mask(enum pipe_format format, VkImageAspectFlags aspect_mask)
{
uint8_t mask = 0xf;
assert(aspect_mask);
/* note: the only format with partial writing is D24S8,
* clear/blit uses the _AS_R8G8B8A8 format to access it
*/
- if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
mask = 0x7;
if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
@@ -987,18 +1048,18 @@ aspect_write_mask(VkFormat vk_format, VkImageAspectFlags aspect_mask)
static void
r3d_setup(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
- VkFormat vk_format,
+ enum pipe_format format,
VkImageAspectFlags aspect_mask,
unsigned blit_param,
bool clear,
bool ubwc,
VkSampleCountFlagBits samples)
{
- enum a6xx_format format = tu6_base_format(vk_format);
+ enum a6xx_format fmt = tu6_base_format(format);
- if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
- vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) {
- format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+ if ((format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
+ format == PIPE_FORMAT_Z24X8_UNORM) && ubwc) {
+ fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
}
if (!cmd->state.pass) {
@@ -1036,14 +1097,14 @@ r3d_setup(struct tu_cmd_buffer *cmd,
tu_cs_emit_regs(cs, A6XX_SP_FS_RENDER_COMPONENTS(.rt0 = 0xf));
tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(0,
- .color_format = format,
- .color_sint = vk_format_is_sint(vk_format),
- .color_uint = vk_format_is_uint(vk_format)));
+ .color_format = fmt,
+ .color_sint = util_format_is_pure_sint(format),
+ .color_uint = util_format_is_pure_uint(format)));
tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0,
- .component_enable = aspect_write_mask(vk_format, aspect_mask)));
- tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format)));
- tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format)));
+ .component_enable = aspect_write_mask(format, aspect_mask)));
+ tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(util_format_is_srgb(format)));
+ tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(util_format_is_srgb(format)));
tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0));
tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0));
@@ -1084,22 +1145,24 @@ struct blit_ops {
const VkOffset2D *dst,
const VkOffset2D *src,
const VkExtent2D *extent);
- void (*clear_value)(struct tu_cs *cs, VkFormat format, const VkClearValue *val);
+ void (*clear_value)(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val);
void (*src)(
struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
- const struct tu_image_view *iview,
+ const struct fdl6_view *iview,
uint32_t layer,
VkFilter filter);
void (*src_buffer)(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
- VkFormat vk_format,
+ enum pipe_format format,
uint64_t va, uint32_t pitch,
uint32_t width, uint32_t height);
- void (*dst)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
- void (*dst_buffer)(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch);
+ void (*dst)(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer);
+ void (*dst_depth)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
+ void (*dst_stencil)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
+ void (*dst_buffer)(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch);
void (*setup)(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
- VkFormat vk_format,
+ enum pipe_format format,
VkImageAspectFlags aspect_mask,
unsigned blit_param, /* CmdBlitImage: rotation in 2D path and z scaling in 3D path */
bool clear,
@@ -1116,6 +1179,8 @@ static const struct blit_ops r2d_ops = {
.src = r2d_src,
.src_buffer = r2d_src_buffer,
.dst = r2d_dst,
+ .dst_depth = r2d_dst_depth,
+ .dst_stencil = r2d_dst_stencil,
.dst_buffer = r2d_dst_buffer,
.setup = r2d_setup,
.run = r2d_run,
@@ -1128,6 +1193,8 @@ static const struct blit_ops r3d_ops = {
.src = r3d_src,
.src_buffer = r3d_src_buffer,
.dst = r3d_dst,
+ .dst_depth = r3d_dst_depth,
+ .dst_stencil = r3d_dst_stencil,
.dst_buffer = r3d_dst_buffer,
.setup = r3d_setup,
.run = r3d_run,
@@ -1150,76 +1217,53 @@ coords(const struct blit_ops *ops,
* compression behavior, so no just returning R8_UINT/R16_UINT/R32_UINT for
* everything.
*/
-static VkFormat
-copy_format(VkFormat format, VkImageAspectFlags aspect_mask, bool copy_buffer)
-{
- if (vk_format_is_compressed(format)) {
- switch (vk_format_get_blocksize(format)) {
- case 1: return VK_FORMAT_R8_UINT;
- case 2: return VK_FORMAT_R16_UINT;
- case 4: return VK_FORMAT_R32_UINT;
- case 8: return VK_FORMAT_R32G32_UINT;
- case 16:return VK_FORMAT_R32G32B32A32_UINT;
+static enum pipe_format
+copy_format(VkFormat vk_format, VkImageAspectFlags aspect_mask, bool copy_buffer)
+{
+ if (vk_format_is_compressed(vk_format)) {
+ switch (vk_format_get_blocksize(vk_format)) {
+ case 1: return PIPE_FORMAT_R8_UINT;
+ case 2: return PIPE_FORMAT_R16_UINT;
+ case 4: return PIPE_FORMAT_R32_UINT;
+ case 8: return PIPE_FORMAT_R32G32_UINT;
+ case 16:return PIPE_FORMAT_R32G32B32A32_UINT;
default:
unreachable("unhandled format size");
}
}
- switch (format) {
+ enum pipe_format format = tu_vk_format_to_pipe_format(vk_format);
+
/* For SNORM formats, copy them as the equivalent UNORM format. If we treat
* them as snorm then the 0x80 (-1.0 snorm8) value will get clamped to 0x81
* (also -1.0), when we're supposed to be memcpying the bits. See
* https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion.
*/
- case VK_FORMAT_R8_SNORM:
- return VK_FORMAT_R8_UNORM;
- case VK_FORMAT_R8G8_SNORM:
- return VK_FORMAT_R8G8_UNORM;
- case VK_FORMAT_R8G8B8_SNORM:
- return VK_FORMAT_R8G8B8_UNORM;
- case VK_FORMAT_B8G8R8_SNORM:
- return VK_FORMAT_B8G8R8_UNORM;
- case VK_FORMAT_R8G8B8A8_SNORM:
- return VK_FORMAT_R8G8B8A8_UNORM;
- case VK_FORMAT_B8G8R8A8_SNORM:
- return VK_FORMAT_B8G8R8A8_UNORM;
- case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
- return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
- case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
- return VK_FORMAT_A2R10G10B10_UNORM_PACK32;
- case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
- return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
- case VK_FORMAT_R16_SNORM:
- return VK_FORMAT_R16_UNORM;
- case VK_FORMAT_R16G16_SNORM:
- return VK_FORMAT_R16G16_UNORM;
- case VK_FORMAT_R16G16B16_SNORM:
- return VK_FORMAT_R16G16B16_UNORM;
- case VK_FORMAT_R16G16B16A16_SNORM:
- return VK_FORMAT_R16G16B16A16_UNORM;
-
- case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
- return VK_FORMAT_R32_UINT;
-
- case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+ format = util_format_snorm_to_unorm(format);
+
+ switch (format) {
+ case PIPE_FORMAT_R9G9B9E5_FLOAT:
+ return PIPE_FORMAT_R32_UINT;
+
+ case PIPE_FORMAT_G8_B8R8_420_UNORM:
if (aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT)
- return VK_FORMAT_R8G8_UNORM;
+ return PIPE_FORMAT_R8G8_UNORM;
else
- return VK_FORMAT_R8_UNORM;
- case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
- return VK_FORMAT_R8_UNORM;
+ return PIPE_FORMAT_Y8_UNORM;
+ case PIPE_FORMAT_G8_B8_R8_420_UNORM:
+ return PIPE_FORMAT_R8_UNORM;
- case VK_FORMAT_D24_UNORM_S8_UINT:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT && copy_buffer)
- return VK_FORMAT_R8_UNORM;
+ return PIPE_FORMAT_R8_UNORM;
else
return format;
- case VK_FORMAT_D32_SFLOAT_S8_UINT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
- return VK_FORMAT_S8_UINT;
+ return PIPE_FORMAT_S8_UINT;
assert(aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT);
- return VK_FORMAT_D32_SFLOAT;
+ return PIPE_FORMAT_Z32_FLOAT;
default:
return format;
@@ -1234,11 +1278,11 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd,
{
const struct blit_ops *ops = &r2d_ops;
- ops->setup(cmd, cs, VK_FORMAT_D16_UNORM, VK_IMAGE_ASPECT_DEPTH_BIT, 0, true, false,
+ ops->setup(cmd, cs, PIPE_FORMAT_Z16_UNORM, VK_IMAGE_ASPECT_DEPTH_BIT, 0, true, false,
VK_SAMPLE_COUNT_1_BIT);
- ops->clear_value(cs, VK_FORMAT_D16_UNORM, value);
- ops->dst_buffer(cs, VK_FORMAT_D16_UNORM,
- image->bo->iova + image->bo_offset + image->lrz_offset,
+ ops->clear_value(cs, PIPE_FORMAT_Z16_UNORM, value);
+ ops->dst_buffer(cs, PIPE_FORMAT_Z16_UNORM,
+ image->iova + image->lrz_offset,
image->lrz_pitch * 2);
ops->coords(cs, &(VkOffset2D) {}, NULL, &(VkExtent2D) {image->lrz_pitch, image->lrz_height});
ops->run(cmd, cs);
@@ -1246,9 +1290,9 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd,
}
static void
-tu_image_view_copy_blit(struct tu_image_view *iview,
+tu_image_view_copy_blit(struct fdl6_view *iview,
struct tu_image *image,
- VkFormat format,
+ enum pipe_format format,
const VkImageSubresourceLayers *subres,
uint32_t layer,
bool stencil_read,
@@ -1257,53 +1301,58 @@ tu_image_view_copy_blit(struct tu_image_view *iview,
VkImageAspectFlags aspect_mask = subres->aspectMask;
/* always use the AS_R8G8B8A8 format for these */
- if (format == VK_FORMAT_D24_UNORM_S8_UINT ||
- format == VK_FORMAT_X8_D24_UNORM_PACK32) {
+ if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
+ format == PIPE_FORMAT_Z24X8_UNORM) {
aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
}
- tu_image_view_init(iview, &(VkImageViewCreateInfo) {
- .image = tu_image_to_handle(image),
- .viewType = z_scale ? VK_IMAGE_VIEW_TYPE_3D : VK_IMAGE_VIEW_TYPE_2D,
- .format = format,
- /* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */
- .components.r = stencil_read ? VK_COMPONENT_SWIZZLE_A : VK_COMPONENT_SWIZZLE_R,
- .subresourceRange = {
- .aspectMask = aspect_mask,
- .baseMipLevel = subres->mipLevel,
- .levelCount = 1,
- .baseArrayLayer = subres->baseArrayLayer + layer,
- .layerCount = 1,
+ const struct fdl_layout *layout =
+ &image->layout[tu6_plane_index(image->vk_format, aspect_mask)];
+
+ fdl6_view_init(iview, &layout, &(struct fdl_view_args) {
+ .iova = image->iova,
+ .base_array_layer = subres->baseArrayLayer + layer,
+ .layer_count = 1,
+ .base_miplevel = subres->mipLevel,
+ .level_count = 1,
+ .format = tu_format_for_aspect(format, aspect_mask),
+ .swiz = {
+ /* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */
+ stencil_read ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_X,
+ PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W
},
+ .type = z_scale ? FDL_VIEW_TYPE_3D : FDL_VIEW_TYPE_2D,
}, false);
}
static void
-tu_image_view_copy(struct tu_image_view *iview,
+tu_image_view_copy(struct fdl6_view *iview,
struct tu_image *image,
- VkFormat format,
+ enum pipe_format format,
const VkImageSubresourceLayers *subres,
uint32_t layer,
bool stencil_read)
{
- format = copy_format(format, subres->aspectMask, false);
tu_image_view_copy_blit(iview, image, format, subres, layer, stencil_read, false);
}
static void
-tu_image_view_blit(struct tu_image_view *iview,
+tu_image_view_blit(struct fdl6_view *iview,
struct tu_image *image,
const VkImageSubresourceLayers *subres,
uint32_t layer)
{
- tu_image_view_copy_blit(iview, image, image->vk_format, subres, layer, false, false);
+ enum pipe_format format =
+ tu6_plane_format(image->vk_format, tu6_plane_index(image->vk_format,
+ subres->aspectMask));
+ tu_image_view_copy_blit(iview, image, format, subres, layer, false, false);
}
static void
tu6_blit_image(struct tu_cmd_buffer *cmd,
struct tu_image *src_image,
struct tu_image *dst_image,
- const VkImageBlit *info,
+ const VkImageBlit2KHR *info,
VkFilter filter)
{
const struct blit_ops *ops = &r2d_ops;
@@ -1375,7 +1424,7 @@ tu6_blit_image(struct tu_cmd_buffer *cmd,
trace_start_blit(&cmd->trace, cs);
- ops->setup(cmd, cs, format, info->dstSubresource.aspectMask,
+ ops->setup(cmd, cs, tu_vk_format_to_pipe_format(format), info->dstSubresource.aspectMask,
blit_param, false, dst_image->layout[0].ubwc,
dst_image->layout[0].nr_samples);
@@ -1399,12 +1448,16 @@ tu6_blit_image(struct tu_cmd_buffer *cmd,
A6XX_GRAS_2D_SRC_BR_Y(MAX2(info->srcOffsets[0].y, info->srcOffsets[1].y) - 1));
}
- struct tu_image_view dst, src;
+ struct fdl6_view dst, src;
tu_image_view_blit(&dst, dst_image, &info->dstSubresource,
MIN2(info->dstOffsets[0].z, info->dstOffsets[1].z));
if (z_scale) {
- tu_image_view_copy_blit(&src, src_image, src_image->vk_format,
+ enum pipe_format src_format =
+ tu6_plane_format(src_image->vk_format,
+ tu6_plane_index(src_image->vk_format,
+ info->srcSubresource.aspectMask));
+ tu_image_view_copy_blit(&src, src_image, src_format,
&info->srcSubresource, 0, false, true);
ops->src(cmd, cs, &src, 0, filter);
} else {
@@ -1432,35 +1485,30 @@ tu6_blit_image(struct tu_cmd_buffer *cmd,
}
VKAPI_ATTR void VKAPI_CALL
-tu_CmdBlitImage(VkCommandBuffer commandBuffer,
- VkImage srcImage,
- VkImageLayout srcImageLayout,
- VkImage dstImage,
- VkImageLayout dstImageLayout,
- uint32_t regionCount,
- const VkImageBlit *pRegions,
- VkFilter filter)
+tu_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,
+ const VkBlitImageInfo2KHR* pBlitImageInfo)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
- TU_FROM_HANDLE(tu_image, src_image, srcImage);
- TU_FROM_HANDLE(tu_image, dst_image, dstImage);
+ TU_FROM_HANDLE(tu_image, src_image, pBlitImageInfo->srcImage);
+ TU_FROM_HANDLE(tu_image, dst_image, pBlitImageInfo->dstImage);
- for (uint32_t i = 0; i < regionCount; ++i) {
+ for (uint32_t i = 0; i < pBlitImageInfo->regionCount; ++i) {
/* can't blit both depth and stencil at once with D32_S8
* TODO: more advanced 3D blit path to support it instead?
*/
if (src_image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
dst_image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
- VkImageBlit region = pRegions[i];
- u_foreach_bit(b, pRegions[i].dstSubresource.aspectMask) {
+ VkImageBlit2KHR region = pBlitImageInfo->pRegions[i];
+ u_foreach_bit(b, region.dstSubresource.aspectMask) {
region.srcSubresource.aspectMask = BIT(b);
region.dstSubresource.aspectMask = BIT(b);
- tu6_blit_image(cmd, src_image, dst_image, &region, filter);
+ tu6_blit_image(cmd, src_image, dst_image, &region, pBlitImageInfo->filter);
}
continue;
}
- tu6_blit_image(cmd, src_image, dst_image, pRegions + i, filter);
+ tu6_blit_image(cmd, src_image, dst_image, pBlitImageInfo->pRegions + i,
+ pBlitImageInfo->filter);
}
}
@@ -1494,12 +1542,14 @@ static void
tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd,
struct tu_buffer *src_buffer,
struct tu_image *dst_image,
- const VkBufferImageCopy *info)
+ const VkBufferImageCopy2KHR *info)
{
struct tu_cs *cs = &cmd->cs;
uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount);
- VkFormat src_format =
+ enum pipe_format src_format =
copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, true);
+ enum pipe_format dst_format =
+ copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, false);
const struct blit_ops *ops = &r2d_ops;
/* special case for buffer to stencil */
@@ -1508,9 +1558,9 @@ tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd,
ops = &r3d_ops;
}
- /* TODO: G8_B8R8_2PLANE_420_UNORM Y plane has different hardware format,
- * which matters for UBWC. buffer_to_image/etc can fail because of this
- */
+ /* note: could use "R8_UNORM" when no UBWC */
+ if (src_format == PIPE_FORMAT_Y8_UNORM)
+ ops = &r3d_ops;
VkOffset3D offset = info->imageOffset;
VkExtent3D extent = info->imageExtent;
@@ -1519,24 +1569,23 @@ tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd,
copy_compressed(dst_image->vk_format, &offset, &extent, &src_width, &src_height);
- uint32_t pitch = src_width * vk_format_get_blocksize(src_format);
+ uint32_t pitch = src_width * util_format_get_blocksize(src_format);
uint32_t layer_size = src_height * pitch;
- ops->setup(cmd, cs,
- copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, false),
+ ops->setup(cmd, cs, dst_format,
info->imageSubresource.aspectMask, 0, false, dst_image->layout[0].ubwc,
dst_image->layout[0].nr_samples);
- struct tu_image_view dst;
- tu_image_view_copy(&dst, dst_image, dst_image->vk_format, &info->imageSubresource, offset.z, false);
+ struct fdl6_view dst;
+ tu_image_view_copy(&dst, dst_image, dst_format, &info->imageSubresource, offset.z, false);
for (uint32_t i = 0; i < layers; i++) {
ops->dst(cs, &dst, i);
- uint64_t src_va = tu_buffer_iova(src_buffer) + info->bufferOffset + layer_size * i;
+ uint64_t src_va = src_buffer->iova + info->bufferOffset + layer_size * i;
if ((src_va & 63) || (pitch & 63)) {
for (uint32_t y = 0; y < extent.height; y++) {
- uint32_t x = (src_va & 63) / vk_format_get_blocksize(src_format);
+ uint32_t x = (src_va & 63) / util_format_get_blocksize(src_format);
ops->src_buffer(cmd, cs, src_format, src_va & ~63, pitch,
x + extent.width, 1);
ops->coords(cs, &(VkOffset2D){offset.x, offset.y + y}, &(VkOffset2D){x},
@@ -1555,39 +1604,43 @@ tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd,
}
VKAPI_ATTR void VKAPI_CALL
-tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
- VkBuffer srcBuffer,
- VkImage dstImage,
- VkImageLayout dstImageLayout,
- uint32_t regionCount,
- const VkBufferImageCopy *pRegions)
+tu_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
- TU_FROM_HANDLE(tu_image, dst_image, dstImage);
- TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
+ TU_FROM_HANDLE(tu_image, dst_image, pCopyBufferToImageInfo->dstImage);
+ TU_FROM_HANDLE(tu_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
- for (unsigned i = 0; i < regionCount; ++i)
- tu_copy_buffer_to_image(cmd, src_buffer, dst_image, pRegions + i);
+ for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; ++i)
+ tu_copy_buffer_to_image(cmd, src_buffer, dst_image,
+ pCopyBufferToImageInfo->pRegions + i);
}
static void
tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd,
struct tu_image *src_image,
struct tu_buffer *dst_buffer,
- const VkBufferImageCopy *info)
+ const VkBufferImageCopy2KHR *info)
{
struct tu_cs *cs = &cmd->cs;
uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount);
- VkFormat dst_format =
+ enum pipe_format dst_format =
copy_format(src_image->vk_format, info->imageSubresource.aspectMask, true);
+ enum pipe_format src_format =
+ copy_format(src_image->vk_format, info->imageSubresource.aspectMask, false);
+ const struct blit_ops *ops = &r2d_ops;
bool stencil_read = false;
if (src_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
+ ops = &r3d_ops;
stencil_read = true;
}
- const struct blit_ops *ops = stencil_read ? &r3d_ops : &r2d_ops;
+ /* note: could use "R8_UNORM" when no UBWC */
+ if (dst_format == PIPE_FORMAT_Y8_UNORM)
+ ops = &r3d_ops;
+
VkOffset3D offset = info->imageOffset;
VkExtent3D extent = info->imageExtent;
uint32_t dst_width = info->bufferRowLength ?: extent.width;
@@ -1595,22 +1648,22 @@ tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd,
copy_compressed(src_image->vk_format, &offset, &extent, &dst_width, &dst_height);
- uint32_t pitch = dst_width * vk_format_get_blocksize(dst_format);
+ uint32_t pitch = dst_width * util_format_get_blocksize(dst_format);
uint32_t layer_size = pitch * dst_height;
ops->setup(cmd, cs, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false,
VK_SAMPLE_COUNT_1_BIT);
- struct tu_image_view src;
- tu_image_view_copy(&src, src_image, src_image->vk_format, &info->imageSubresource, offset.z, stencil_read);
+ struct fdl6_view src;
+ tu_image_view_copy(&src, src_image, src_format, &info->imageSubresource, offset.z, stencil_read);
for (uint32_t i = 0; i < layers; i++) {
ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST);
- uint64_t dst_va = tu_buffer_iova(dst_buffer) + info->bufferOffset + layer_size * i;
+ uint64_t dst_va = dst_buffer->iova + info->bufferOffset + layer_size * i;
if ((dst_va & 63) || (pitch & 63)) {
for (uint32_t y = 0; y < extent.height; y++) {
- uint32_t x = (dst_va & 63) / vk_format_get_blocksize(dst_format);
+ uint32_t x = (dst_va & 63) / util_format_get_blocksize(dst_format);
ops->dst_buffer(cs, dst_format, dst_va & ~63, 0);
ops->coords(cs, &(VkOffset2D) {x}, &(VkOffset2D){offset.x, offset.y + y},
&(VkExtent2D) {extent.width, 1});
@@ -1628,19 +1681,16 @@ tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd,
}
VKAPI_ATTR void VKAPI_CALL
-tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
- VkImage srcImage,
- VkImageLayout srcImageLayout,
- VkBuffer dstBuffer,
- uint32_t regionCount,
- const VkBufferImageCopy *pRegions)
+tu_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyImageToBufferInfo2KHR* pCopyImageToBufferInfo)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
- TU_FROM_HANDLE(tu_image, src_image, srcImage);
- TU_FROM_HANDLE(tu_buffer, dst_buffer, dstBuffer);
+ TU_FROM_HANDLE(tu_image, src_image, pCopyImageToBufferInfo->srcImage);
+ TU_FROM_HANDLE(tu_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
- for (unsigned i = 0; i < regionCount; ++i)
- tu_copy_image_to_buffer(cmd, src_image, dst_buffer, pRegions + i);
+ for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; ++i)
+ tu_copy_image_to_buffer(cmd, src_image, dst_buffer,
+ pCopyImageToBufferInfo->pRegions + i);
}
/* Tiled formats don't support swapping, which means that we can't support
@@ -1654,7 +1704,7 @@ tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
*/
static bool
-is_swapped_format(VkFormat format)
+is_swapped_format(enum pipe_format format)
{
struct tu_native_format linear = tu6_format_texture(format, TILE6_LINEAR);
struct tu_native_format tiled = tu6_format_texture(format, TILE6_3);
@@ -1676,7 +1726,7 @@ static void
tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
struct tu_image *src_image,
struct tu_image *dst_image,
- const VkImageCopy *info)
+ const VkImageCopy2KHR *info)
{
const struct blit_ops *ops = &r2d_ops;
struct tu_cs *cs = &cmd->cs;
@@ -1684,7 +1734,7 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
if (dst_image->layout[0].nr_samples > 1)
ops = &r3d_ops;
- VkFormat format = VK_FORMAT_UNDEFINED;
+ enum pipe_format format = PIPE_FORMAT_NONE;
VkOffset3D src_offset = info->srcOffset;
VkOffset3D dst_offset = info->dstOffset;
VkExtent3D extent = info->extent;
@@ -1709,8 +1759,13 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
copy_compressed(src_image->vk_format, &src_offset, &extent, NULL, NULL);
copy_compressed(dst_image->vk_format, &dst_offset, NULL, NULL, NULL);
- VkFormat dst_format = copy_format(dst_image->vk_format, info->dstSubresource.aspectMask, false);
- VkFormat src_format = copy_format(src_image->vk_format, info->srcSubresource.aspectMask, false);
+ enum pipe_format dst_format = copy_format(dst_image->vk_format, info->dstSubresource.aspectMask, false);
+ enum pipe_format src_format = copy_format(src_image->vk_format, info->srcSubresource.aspectMask, false);
+
+ /* note: could use "R8_UNORM" when no UBWC */
+ if (dst_format == PIPE_FORMAT_Y8_UNORM ||
+ src_format == PIPE_FORMAT_Y8_UNORM)
+ ops = &r3d_ops;
bool use_staging_blit = false;
@@ -1748,54 +1803,50 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
use_staging_blit = true;
}
- struct tu_image_view dst, src;
+ struct fdl6_view dst, src;
if (use_staging_blit) {
tu_image_view_copy(&dst, dst_image, dst_format, &info->dstSubresource, dst_offset.z, false);
tu_image_view_copy(&src, src_image, src_format, &info->srcSubresource, src_offset.z, false);
- struct tu_image staging_image = {
- .base.type = VK_OBJECT_TYPE_IMAGE,
- .vk_format = src_format,
- .level_count = 1,
- .layer_count = info->srcSubresource.layerCount,
- .bo_offset = 0,
- };
-
- VkImageSubresourceLayers staging_subresource = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .mipLevel = 0,
- .baseArrayLayer = 0,
- .layerCount = info->srcSubresource.layerCount,
- };
-
+ struct fdl_layout staging_layout = { 0 };
VkOffset3D staging_offset = { 0 };
- staging_image.layout[0].tile_mode = TILE6_LINEAR;
- staging_image.layout[0].ubwc = false;
+ staging_layout.tile_mode = TILE6_LINEAR;
+ staging_layout.ubwc = false;
- fdl6_layout(&staging_image.layout[0],
- vk_format_to_pipe_format(staging_image.vk_format),
+ fdl6_layout(&staging_layout,
+ src_format,
src_image->layout[0].nr_samples,
extent.width,
extent.height,
extent.depth,
- staging_image.level_count,
- staging_image.layer_count,
+ 1,
+ info->srcSubresource.layerCount,
extent.depth > 1,
NULL);
+ struct tu_bo *staging_bo;
VkResult result = tu_get_scratch_bo(cmd->device,
- staging_image.layout[0].size,
- &staging_image.bo);
+ staging_layout.size,
+ &staging_bo);
if (result != VK_SUCCESS) {
cmd->record_result = result;
return;
}
- struct tu_image_view staging;
- tu_image_view_copy(&staging, &staging_image, src_format,
- &staging_subresource, 0, false);
+ struct fdl6_view staging;
+ const struct fdl_layout *staging_layout_ptr = &staging_layout;
+ fdl6_view_init(&staging, &staging_layout_ptr, &(struct fdl_view_args) {
+ .iova = staging_bo->iova,
+ .base_array_layer = 0,
+ .layer_count = 1,
+ .base_miplevel = 0,
+ .level_count = info->srcSubresource.layerCount,
+ .format = tu_format_for_aspect(src_format, VK_IMAGE_ASPECT_COLOR_BIT),
+ .swiz = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
+ .type = FDL_VIEW_TYPE_2D,
+ }, false);
ops->setup(cmd, cs, src_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false,
dst_image->layout[0].nr_samples);
@@ -1814,8 +1865,16 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
tu_cs_emit_wfi(cs);
- tu_image_view_copy(&staging, &staging_image, dst_format,
- &staging_subresource, 0, false);
+ fdl6_view_init(&staging, &staging_layout_ptr, &(struct fdl_view_args) {
+ .iova = staging_bo->iova,
+ .base_array_layer = 0,
+ .layer_count = 1,
+ .base_miplevel = 0,
+ .level_count = info->srcSubresource.layerCount,
+ .format = tu_format_for_aspect(dst_format, VK_IMAGE_ASPECT_COLOR_BIT),
+ .swiz = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
+ .type = FDL_VIEW_TYPE_2D,
+ }, false);
ops->setup(cmd, cs, dst_format, info->dstSubresource.aspectMask,
0, false, dst_image->layout[0].ubwc,
@@ -1847,22 +1906,17 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
}
VKAPI_ATTR void VKAPI_CALL
-tu_CmdCopyImage(VkCommandBuffer commandBuffer,
- VkImage srcImage,
- VkImageLayout srcImageLayout,
- VkImage destImage,
- VkImageLayout destImageLayout,
- uint32_t regionCount,
- const VkImageCopy *pRegions)
+tu_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyImageInfo2KHR* pCopyImageInfo)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
- TU_FROM_HANDLE(tu_image, src_image, srcImage);
- TU_FROM_HANDLE(tu_image, dst_image, destImage);
+ TU_FROM_HANDLE(tu_image, src_image, pCopyImageInfo->srcImage);
+ TU_FROM_HANDLE(tu_image, dst_image, pCopyImageInfo->dstImage);
- for (uint32_t i = 0; i < regionCount; ++i) {
+ for (uint32_t i = 0; i < pCopyImageInfo->regionCount; ++i) {
if (src_image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
- VkImageCopy info = pRegions[i];
- u_foreach_bit(b, pRegions[i].dstSubresource.aspectMask) {
+ VkImageCopy2KHR info = pCopyImageInfo->pRegions[i];
+ u_foreach_bit(b, info.dstSubresource.aspectMask) {
info.srcSubresource.aspectMask = BIT(b);
info.dstSubresource.aspectMask = BIT(b);
tu_copy_image_to_image(cmd, src_image, dst_image, &info);
@@ -1870,7 +1924,8 @@ tu_CmdCopyImage(VkCommandBuffer commandBuffer,
continue;
}
- tu_copy_image_to_image(cmd, src_image, dst_image, pRegions + i);
+ tu_copy_image_to_image(cmd, src_image, dst_image,
+ pCopyImageInfo->pRegions + i);
}
}
@@ -1883,7 +1938,7 @@ copy_buffer(struct tu_cmd_buffer *cmd,
{
const struct blit_ops *ops = &r2d_ops;
struct tu_cs *cs = &cmd->cs;
- VkFormat format = block_size == 4 ? VK_FORMAT_R32_UINT : VK_FORMAT_R8_UNORM;
+ enum pipe_format format = block_size == 4 ? PIPE_FORMAT_R32_UINT : PIPE_FORMAT_R8_UNORM;
uint64_t blocks = size / block_size;
ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false,
@@ -1908,21 +1963,19 @@ copy_buffer(struct tu_cmd_buffer *cmd,
}
VKAPI_ATTR void VKAPI_CALL
-tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
- VkBuffer srcBuffer,
- VkBuffer dstBuffer,
- uint32_t regionCount,
- const VkBufferCopy *pRegions)
+tu_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyBufferInfo2KHR *pCopyBufferInfo)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
- TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
- TU_FROM_HANDLE(tu_buffer, dst_buffer, dstBuffer);
+ TU_FROM_HANDLE(tu_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
+ TU_FROM_HANDLE(tu_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
- for (unsigned i = 0; i < regionCount; ++i) {
+ for (unsigned i = 0; i < pCopyBufferInfo->regionCount; ++i) {
+ const VkBufferCopy2KHR *region = &pCopyBufferInfo->pRegions[i];
copy_buffer(cmd,
- tu_buffer_iova(dst_buffer) + pRegions[i].dstOffset,
- tu_buffer_iova(src_buffer) + pRegions[i].srcOffset,
- pRegions[i].size, 1);
+ dst_buffer->iova + region->dstOffset,
+ src_buffer->iova + region->srcOffset,
+ region->size, 1);
}
}
@@ -1944,7 +1997,7 @@ tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
}
memcpy(tmp.map, pData, dataSize);
- copy_buffer(cmd, tu_buffer_iova(buffer) + dstOffset, tmp.iova, dataSize, 4);
+ copy_buffer(cmd, buffer->iova + dstOffset, tmp.iova, dataSize, 4);
}
VKAPI_ATTR void VKAPI_CALL
@@ -1962,18 +2015,18 @@ tu_CmdFillBuffer(VkCommandBuffer commandBuffer,
if (fillSize == VK_WHOLE_SIZE)
fillSize = buffer->size - dstOffset;
- uint64_t dst_va = tu_buffer_iova(buffer) + dstOffset;
+ uint64_t dst_va = buffer->iova + dstOffset;
uint32_t blocks = fillSize / 4;
- ops->setup(cmd, cs, VK_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false,
+ ops->setup(cmd, cs, PIPE_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false,
VK_SAMPLE_COUNT_1_BIT);
- ops->clear_value(cs, VK_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}});
+ ops->clear_value(cs, PIPE_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}});
while (blocks) {
uint32_t dst_x = (dst_va & 63) / 4;
uint32_t width = MIN2(blocks, 0x4000 - dst_x);
- ops->dst_buffer(cs, VK_FORMAT_R32_UINT, dst_va & ~63, 0);
+ ops->dst_buffer(cs, PIPE_FORMAT_R32_UINT, dst_va & ~63, 0);
ops->coords(cs, &(VkOffset2D) {dst_x}, NULL, &(VkExtent2D) {width, 1});
ops->run(cmd, cs);
@@ -1985,25 +2038,21 @@ tu_CmdFillBuffer(VkCommandBuffer commandBuffer,
}
VKAPI_ATTR void VKAPI_CALL
-tu_CmdResolveImage(VkCommandBuffer commandBuffer,
- VkImage srcImage,
- VkImageLayout srcImageLayout,
- VkImage dstImage,
- VkImageLayout dstImageLayout,
- uint32_t regionCount,
- const VkImageResolve *pRegions)
+tu_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
+ const VkResolveImageInfo2KHR* pResolveImageInfo)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
- TU_FROM_HANDLE(tu_image, src_image, srcImage);
- TU_FROM_HANDLE(tu_image, dst_image, dstImage);
+ TU_FROM_HANDLE(tu_image, src_image, pResolveImageInfo->srcImage);
+ TU_FROM_HANDLE(tu_image, dst_image, pResolveImageInfo->dstImage);
const struct blit_ops *ops = &r2d_ops;
struct tu_cs *cs = &cmd->cs;
- ops->setup(cmd, cs, dst_image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT,
- 0, false, dst_image->layout[0].ubwc, VK_SAMPLE_COUNT_1_BIT);
+ ops->setup(cmd, cs, tu_vk_format_to_pipe_format(dst_image->vk_format),
+ VK_IMAGE_ASPECT_COLOR_BIT, 0, false, dst_image->layout[0].ubwc,
+ VK_SAMPLE_COUNT_1_BIT);
- for (uint32_t i = 0; i < regionCount; ++i) {
- const VkImageResolve *info = &pRegions[i];
+ for (uint32_t i = 0; i < pResolveImageInfo->regionCount; ++i) {
+ const VkImageResolve2KHR *info = &pResolveImageInfo->pRegions[i];
uint32_t layers = MAX2(info->extent.depth, info->dstSubresource.layerCount);
assert(info->srcSubresource.layerCount == info->dstSubresource.layerCount);
@@ -2011,7 +2060,7 @@ tu_CmdResolveImage(VkCommandBuffer commandBuffer,
coords(ops, cs, &info->dstOffset, &info->srcOffset, &info->extent);
- struct tu_image_view dst, src;
+ struct fdl6_view dst, src;
tu_image_view_blit(&dst, dst_image, &info->dstSubresource, info->dstOffset.z);
tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffset.z);
@@ -2040,23 +2089,29 @@ resolve_sysmem(struct tu_cmd_buffer *cmd,
uint32_t layer_mask,
uint32_t layers,
const VkRect2D *rect,
- bool separate_stencil)
+ bool separate_ds)
{
const struct blit_ops *ops = &r2d_ops;
trace_start_sysmem_resolve(&cmd->trace, cs);
- ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT,
- 0, false, dst->ubwc_enabled, VK_SAMPLE_COUNT_1_BIT);
+ ops->setup(cmd, cs, tu_vk_format_to_pipe_format(format),
+ VK_IMAGE_ASPECT_COLOR_BIT, 0, false, dst->view.ubwc_enabled,
+ VK_SAMPLE_COUNT_1_BIT);
ops->coords(cs, &rect->offset, &rect->offset, &rect->extent);
for_each_layer(i, layer_mask, layers) {
- if (separate_stencil) {
- r2d_src_stencil(cmd, cs, src, i, VK_FILTER_NEAREST);
- r2d_dst_stencil(cs, dst, i);
+ if (separate_ds) {
+ if (format == VK_FORMAT_D32_SFLOAT) {
+ r2d_src_depth(cmd, cs, src, i, VK_FILTER_NEAREST);
+ ops->dst_depth(cs, dst, i);
+ } else {
+ r2d_src_stencil(cmd, cs, src, i, VK_FILTER_NEAREST);
+ ops->dst_stencil(cs, dst, i);
+ }
} else {
- ops->src(cmd, cs, src, i, VK_FILTER_NEAREST);
- ops->dst(cs, dst, i);
+ ops->src(cmd, cs, &src->view, i, VK_FILTER_NEAREST);
+ ops->dst(cs, &dst->view, i);
}
ops->run(cmd, cs);
}
@@ -2079,7 +2134,7 @@ tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
if (dst->image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
resolve_sysmem(cmd, cs, VK_FORMAT_D32_SFLOAT,
- src, dst, layer_mask, layers, rect, false);
+ src, dst, layer_mask, layers, rect, true);
resolve_sysmem(cmd, cs, VK_FORMAT_S8_UINT,
src, dst, layer_mask, layers, rect, true);
} else {
@@ -2098,9 +2153,14 @@ clear_image(struct tu_cmd_buffer *cmd,
uint32_t level_count = tu_get_levelCount(image, range);
uint32_t layer_count = tu_get_layerCount(image, range);
struct tu_cs *cs = &cmd->cs;
- VkFormat format = image->vk_format;
- if (format == VK_FORMAT_D32_SFLOAT_S8_UINT || format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
- format = copy_format(format, aspect_mask, false);
+ enum pipe_format format;
+ if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
+ format = PIPE_FORMAT_R32_UINT;
+ } else {
+ format = tu6_plane_format(image->vk_format,
+ tu6_plane_index(image->vk_format,
+ aspect_mask));
+ }
if (image->layout[0].depth0 > 1) {
assert(layer_count == 1);
@@ -2112,7 +2172,7 @@ clear_image(struct tu_cmd_buffer *cmd,
ops->setup(cmd, cs, format, aspect_mask, 0, true, image->layout[0].ubwc,
image->layout[0].nr_samples);
if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
- ops->clear_value(cs, VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, clear_value);
+ ops->clear_value(cs, PIPE_FORMAT_R9G9B9E5_FLOAT, clear_value);
else
ops->clear_value(cs, format, clear_value);
@@ -2125,7 +2185,7 @@ clear_image(struct tu_cmd_buffer *cmd,
u_minify(image->layout[0].height0, range->baseMipLevel + j)
});
- struct tu_image_view dst;
+ struct fdl6_view dst;
tu_image_view_copy_blit(&dst, image, format, &(VkImageSubresourceLayers) {
.aspectMask = aspect_mask,
.mipLevel = range->baseMipLevel + j,
@@ -2338,21 +2398,21 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
}
static void
-pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t clear_value[4])
+pack_gmem_clear_value(const VkClearValue *val, enum pipe_format format, uint32_t clear_value[4])
{
switch (format) {
- case VK_FORMAT_X8_D24_UNORM_PACK32:
- case VK_FORMAT_D24_UNORM_S8_UINT:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24) |
val->depthStencil.stencil << 24;
return;
- case VK_FORMAT_D16_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 16);
return;
- case VK_FORMAT_D32_SFLOAT:
+ case PIPE_FORMAT_Z32_FLOAT:
clear_value[0] = fui(val->depthStencil.depth);
return;
- case VK_FORMAT_S8_UINT:
+ case PIPE_FORMAT_S8_UINT:
clear_value[0] = val->depthStencil.stencil;
return;
default:
@@ -2361,33 +2421,33 @@ pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t clear_v
float tmp[4];
memcpy(tmp, val->color.float32, 4 * sizeof(float));
- if (vk_format_is_srgb(format)) {
+ if (util_format_is_srgb(format)) {
for (int i = 0; i < 3; i++)
tmp[i] = util_format_linear_to_srgb_float(tmp[i]);
}
#define PACK_F(type) util_format_##type##_pack_rgba_float \
( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1)
- switch (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
+ switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
case 4:
PACK_F(r4g4b4a4_unorm);
break;
case 5:
- if (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_Y) == 6)
+ if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_Y) == 6)
PACK_F(r5g6b5_unorm);
else
PACK_F(r5g5b5a1_unorm);
break;
case 8:
- if (vk_format_is_snorm(format))
+ if (util_format_is_snorm(format))
PACK_F(r8g8b8a8_snorm);
- else if (vk_format_is_unorm(format))
+ else if (util_format_is_unorm(format))
PACK_F(r8g8b8a8_unorm);
else
pack_int8(clear_value, val->color.uint32);
break;
case 10:
- if (vk_format_is_int(format))
+ if (util_format_is_pure_integer(format))
pack_int10_2(clear_value, val->color.uint32);
else
PACK_F(r10g10b10a2_unorm);
@@ -2396,11 +2456,11 @@ pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t clear_v
clear_value[0] = float3_to_r11g11b10f(val->color.float32);
break;
case 16:
- if (vk_format_is_snorm(format))
+ if (util_format_is_snorm(format))
PACK_F(r16g16b16a16_snorm);
- else if (vk_format_is_unorm(format))
+ else if (util_format_is_unorm(format))
PACK_F(r16g16b16a16_unorm);
- else if (vk_format_is_float(format))
+ else if (util_format_is_float(format))
PACK_F(r16g16b16a16_float);
else
pack_int16(clear_value, val->color.uint32);
@@ -2417,7 +2477,7 @@ pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t clear_v
static void
clear_gmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
- VkFormat format,
+ enum pipe_format format,
uint8_t clear_mask,
uint32_t gmem_offset,
const VkClearValue *value)
@@ -2454,15 +2514,16 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
trace_start_gmem_clear(&cmd->trace, cs);
+ enum pipe_format format = tu_vk_format_to_pipe_format(att->format);
if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
if (mask & VK_IMAGE_ASPECT_DEPTH_BIT)
- clear_gmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, 0xf, att->gmem_offset, value);
+ clear_gmem_attachment(cmd, cs, PIPE_FORMAT_Z32_FLOAT, 0xf, att->gmem_offset, value);
if (mask & VK_IMAGE_ASPECT_STENCIL_BIT)
- clear_gmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, 0xf, att->gmem_offset_stencil, value);
+ clear_gmem_attachment(cmd, cs, PIPE_FORMAT_S8_UINT, 0xf, att->gmem_offset_stencil, value);
return;
}
- clear_gmem_attachment(cmd, cs, att->format, aspect_write_mask(att->format, mask), att->gmem_offset, value);
+ clear_gmem_attachment(cmd, cs, format, aspect_write_mask(format, mask), att->gmem_offset, value);
trace_end_gmem_clear(&cmd->trace, cs, att->format, att->samples);
}
@@ -2554,12 +2615,13 @@ tu_CmdClearAttachments(VkCommandBuffer commandBuffer,
static void
clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
- VkFormat format,
+ VkFormat vk_format,
VkImageAspectFlags clear_mask,
const VkRenderPassBeginInfo *info,
uint32_t a,
- bool separate_stencil)
+ bool separate_ds)
{
+ enum pipe_format format = tu_vk_format_to_pipe_format(vk_format);
const struct tu_framebuffer *fb = cmd->state.framebuffer;
const struct tu_image_view *iview = cmd->state.attachments[a];
const uint32_t clear_views = cmd->state.pass->attachments[a].clear_views;
@@ -2569,19 +2631,20 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
trace_start_sysmem_clear(&cmd->trace, cs);
- ops->setup(cmd, cs, format, clear_mask, 0, true, iview->ubwc_enabled,
+ ops->setup(cmd, cs, format, clear_mask, 0, true, iview->view.ubwc_enabled,
cmd->state.pass->attachments[a].samples);
ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
ops->clear_value(cs, format, &info->pClearValues[a]);
for_each_layer(i, clear_views, fb->layers) {
- if (separate_stencil) {
- if (ops == &r3d_ops)
- r3d_dst_stencil(cs, iview, i);
- else
- r2d_dst_stencil(cs, iview, i);
+ if (separate_ds) {
+ if (vk_format == VK_FORMAT_D32_SFLOAT) {
+ ops->dst_depth(cs, iview, i);
+ } else {
+ ops->dst_stencil(cs, iview, i);
+ }
} else {
- ops->dst(cs, iview, i);
+ ops->dst(cs, &iview->view, i);
}
ops->run(cmd, cs);
}
@@ -2589,7 +2652,7 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
ops->teardown(cmd, cs);
trace_end_sysmem_clear(&cmd->trace, cs,
- format, ops == &r3d_ops,
+ vk_format, ops == &r3d_ops,
cmd->state.pass->attachments[a].samples);
}
@@ -2608,7 +2671,7 @@ tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
if (attachment->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
clear_sysmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT,
- info, a, false);
+ info, a, true);
}
if (attachment->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
clear_sysmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, VK_IMAGE_ASPECT_COLOR_BIT,
@@ -2630,6 +2693,7 @@ tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
*/
if (vk_format_is_depth_or_stencil(attachment->format)) {
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS);
tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH);
} else {
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
@@ -2672,23 +2736,35 @@ tu_emit_blit(struct tu_cmd_buffer *cmd,
tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(
.unk0 = !resolve,
.gmem = !resolve,
- .sample_0 = vk_format_is_int(attachment->format) |
+ .sample_0 = vk_format_is_int(attachment->format) ||
vk_format_is_depth_or_stencil(attachment->format)));
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4);
- if (separate_stencil) {
- tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS);
- tu_cs_emit_qw(cs, iview->stencil_base_addr);
- tu_cs_emit(cs, iview->stencil_PITCH);
+ if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ if (!separate_stencil) {
+ tu_cs_emit(cs, tu_image_view_depth(iview, RB_BLIT_DST_INFO));
+ tu_cs_emit_qw(cs, iview->depth_base_addr);
+ tu_cs_emit(cs, iview->depth_PITCH);
- tu_cs_emit_regs(cs,
- A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset_stencil));
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
+ tu_cs_image_flag_ref(cs, &iview->view, 0);
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset));
+ } else {
+ tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS);
+ tu_cs_emit_qw(cs, iview->stencil_base_addr);
+ tu_cs_emit(cs, iview->stencil_PITCH);
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset_stencil));
+ }
} else {
- tu_cs_emit(cs, iview->RB_BLIT_DST_INFO);
- tu_cs_image_ref_2d(cs, iview, 0, false);
+ tu_cs_emit(cs, iview->view.RB_BLIT_DST_INFO);
+ tu_cs_image_ref_2d(cs, &iview->view, 0, false);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
- tu_cs_image_flag_ref(cs, iview, 0);
+ tu_cs_image_flag_ref(cs, &iview->view, 0);
tu_cs_emit_regs(cs,
A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset));
@@ -2759,25 +2835,31 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
const struct tu_image_view *iview,
uint32_t samples,
bool separate_stencil,
- VkFormat format,
+ enum pipe_format format,
uint32_t gmem_offset,
uint32_t cpp)
{
r2d_setup_common(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false,
- iview->ubwc_enabled, true);
- if (separate_stencil)
- r2d_dst_stencil(cs, iview, 0);
- else
- r2d_dst(cs, iview, 0);
+ iview->view.ubwc_enabled, true);
+
+ if (iview->image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ if (!separate_stencil) {
+ r2d_dst_depth(cs, iview, 0);
+ } else {
+ r2d_dst_stencil(cs, iview, 0);
+ }
+ } else {
+ r2d_dst(cs, &iview->view, 0);
+ }
tu_cs_emit_regs(cs,
A6XX_SP_PS_2D_SRC_INFO(
.color_format = tu6_format_texture(format, TILE6_2).fmt,
.tile_mode = TILE6_2,
- .srgb = vk_format_is_srgb(format),
+ .srgb = util_format_is_srgb(format),
.samples = tu_msaa_samples(samples),
- .samples_average = !vk_format_is_int(format) &&
- !vk_format_is_depth_or_stencil(format),
+ .samples_average = !util_format_is_pure_integer(format) &&
+ !util_format_is_depth_or_stencil(format),
.unk20 = 1,
.unk22 = 1),
/* note: src size does not matter when not scaling */
@@ -2807,26 +2889,45 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
const struct tu_image_view *iview,
uint32_t dst_samples,
bool separate_stencil,
- VkFormat format,
+ enum pipe_format format,
const VkRect2D *render_area,
uint32_t gmem_offset,
uint32_t cpp)
{
+ /* RB_BIN_CONTROL/GRAS_BIN_CONTROL are normally only set once and they
+ * aren't set until we know whether we're HW binning or not, and we want to
+ * avoid a dependence on that here to be able to store attachments before
+ * the end of the renderpass in the future. Use the scratch space to
+ * save/restore them dynamically.
+ */
+ tu_cs_emit_pkt7(cs, CP_REG_TO_SCRATCH, 1);
+ tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(REG_A6XX_RB_BIN_CONTROL) |
+ CP_REG_TO_SCRATCH_0_SCRATCH(0) |
+ CP_REG_TO_SCRATCH_0_CNT(1 - 1));
+
r3d_setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false,
- iview->ubwc_enabled, dst_samples);
+ iview->view.ubwc_enabled, dst_samples);
r3d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
- if (separate_stencil)
- r3d_dst_stencil(cs, iview, 0);
- else
- r3d_dst(cs, iview, 0);
+ if (iview->image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ if (!separate_stencil) {
+ r3d_dst_depth(cs, iview, 0);
+ } else {
+ r3d_dst_stencil(cs, iview, 0);
+ }
+ } else {
+ r3d_dst(cs, &iview->view, 0);
+ }
r3d_src_gmem(cmd, cs, iview, format, gmem_offset, cpp);
/* sync GMEM writes with CACHE. */
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
+ /* Wait for CACHE_INVALIDATE to land */
+ tu_cs_emit_wfi(cs);
+
r3d_run(cmd, cs);
/* Draws write to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
@@ -2835,6 +2936,17 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
* writes to depth images as a color RT, so there's no need to flush depth.
*/
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
+
+ /* Restore RB_BIN_CONTROL/GRAS_BIN_CONTROL saved above. */
+ tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
+ tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_RB_BIN_CONTROL) |
+ CP_SCRATCH_TO_REG_0_SCRATCH(0) |
+ CP_SCRATCH_TO_REG_0_CNT(1 - 1));
+
+ tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
+ tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_GRAS_BIN_CONTROL) |
+ CP_SCRATCH_TO_REG_0_SCRATCH(0) |
+ CP_SCRATCH_TO_REG_0_CNT(1 - 1));
}
void
@@ -2862,13 +2974,17 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
* required y padding in the layout (except for the last level)
*/
bool need_y2_align =
- y2 != iview->extent.height || iview->need_y2_align;
+ y2 != iview->view.height || iview->view.need_y2_align;
bool unaligned =
x1 % phys_dev->info->gmem_align_w ||
- (x2 % phys_dev->info->gmem_align_w && x2 != iview->extent.width) ||
+ (x2 % phys_dev->info->gmem_align_w && x2 != iview->view.width) ||
y1 % phys_dev->info->gmem_align_h || (y2 % phys_dev->info->gmem_align_h && need_y2_align);
+ /* Unaligned store is incredibly rare in CTS, we have to force it to test. */
+ if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_UNALIGNED_STORE))
+ unaligned = true;
+
/* D32_SFLOAT_S8_UINT is quite special format: it has two planes,
* one for depth and other for stencil. When resolving a MSAA
* D32_SFLOAT_S8_UINT to S8_UINT, we need to take that into account.
@@ -2877,22 +2993,25 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
src->format == VK_FORMAT_D32_SFLOAT_S8_UINT &&
dst->format == VK_FORMAT_S8_UINT;
+ bool store_common = dst->store && !resolve_d32s8_s8;
+ bool store_separate_stencil = dst->store_stencil || resolve_d32s8_s8;
+
trace_start_gmem_store(&cmd->trace, cs);
/* use fast path when render area is aligned, except for unsupported resolve cases */
if (!unaligned && (a == gmem_a || blit_can_resolve(dst->format))) {
- if (dst->store)
- tu_emit_blit(cmd, cs, iview, src, true, resolve_d32s8_s8);
- if (dst->store_stencil)
+ if (store_common)
+ tu_emit_blit(cmd, cs, iview, src, true, false);
+ if (store_separate_stencil)
tu_emit_blit(cmd, cs, iview, src, true, true);
trace_end_gmem_store(&cmd->trace, cs, dst->format, true, false);
return;
}
- VkFormat format = src->format;
- if (format == VK_FORMAT_D32_SFLOAT_S8_UINT)
- format = VK_FORMAT_D32_SFLOAT;
+ enum pipe_format format = tu_vk_format_to_pipe_format(src->format);
+ if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ format = PIPE_FORMAT_Z32_FLOAT;
if (dst->samples > 1) {
/* If we hit this path, we have to disable draw states after every tile
@@ -2902,26 +3021,26 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
* TODO: store a flag somewhere so we don't do this more than once and
* don't do it after the renderpass when this happens.
*/
- if (dst->store || dst->store_stencil)
+ if (store_common || store_separate_stencil)
tu_disable_draw_states(cmd, cs);
- if (dst->store) {
- store_3d_blit(cmd, cs, iview, dst->samples, resolve_d32s8_s8, format,
+ if (store_common) {
+ store_3d_blit(cmd, cs, iview, dst->samples, false, format,
render_area, src->gmem_offset, src->cpp);
}
- if (dst->store_stencil) {
- store_3d_blit(cmd, cs, iview, dst->samples, true, VK_FORMAT_S8_UINT,
- render_area, src->gmem_offset, src->samples);
+ if (store_separate_stencil) {
+ store_3d_blit(cmd, cs, iview, dst->samples, true, PIPE_FORMAT_S8_UINT,
+ render_area, src->gmem_offset_stencil, src->samples);
}
} else {
r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
- if (dst->store) {
- store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format,
+ if (store_common) {
+ store_cp_blit(cmd, cs, iview, src->samples, false, format,
src->gmem_offset, src->cpp);
}
- if (dst->store_stencil) {
- store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT,
+ if (store_separate_stencil) {
+ store_cp_blit(cmd, cs, iview, src->samples, true, PIPE_FORMAT_S8_UINT,
src->gmem_offset_stencil, src->samples);
}
}
diff --git a/lib/mesa/src/freedreno/vulkan/tu_legacy.c b/lib/mesa/src/freedreno/vulkan/tu_legacy.c
deleted file mode 100644
index 8209a96b0..000000000
--- a/lib/mesa/src/freedreno/vulkan/tu_legacy.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright 2020 Valve Corporation
- * SPDX-License-Identifier: MIT
- *
- * Authors:
- * Jonathan Marek <jonathan@marek.ca>
- */
-
-#include <vulkan/vulkan.h>
-#include <vulkan/vk_android_native_buffer.h> /* android tu_entrypoints.h depends on this */
-#include <assert.h>
-
-#include "tu_entrypoints.h"
-#include "vk_util.h"
-
-void
-tu_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice pdev,
- uint32_t *count,
- VkQueueFamilyProperties *props)
-{
- if (!props)
- return tu_GetPhysicalDeviceQueueFamilyProperties2(pdev, count, NULL);
-
- VkQueueFamilyProperties2 props2[*count];
- for (uint32_t i = 0; i < *count; i++) {
- props2[i].sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2;
- props2[i].pNext = NULL;
- }
- tu_GetPhysicalDeviceQueueFamilyProperties2(pdev, count, props2);
- for (uint32_t i = 0; i < *count; i++)
- props[i] = props2[i].queueFamilyProperties;
-}
-
-void
-tu_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice pdev,
- VkFormat format,
- VkImageType type,
- VkSampleCountFlagBits samples,
- VkImageUsageFlags usage,
- VkImageTiling tiling,
- uint32_t *count,
- VkSparseImageFormatProperties *props)
-{
- const VkPhysicalDeviceSparseImageFormatInfo2 info = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
- .format = format,
- .type = type,
- .samples = samples,
- .usage = usage,
- .tiling = tiling,
- };
-
- if (!props)
- return tu_GetPhysicalDeviceSparseImageFormatProperties2(pdev, &info, count, NULL);
-
- VkSparseImageFormatProperties2 props2[*count];
- for (uint32_t i = 0; i < *count; i++) {
- props2[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_FORMAT_PROPERTIES_2;
- props2[i].pNext = NULL;
- }
- tu_GetPhysicalDeviceSparseImageFormatProperties2(pdev, &info, count, props2);
- for (uint32_t i = 0; i < *count; i++)
- props[i] = props2[i].properties;
-}
-
-void
-tu_GetImageSparseMemoryRequirements(VkDevice device,
- VkImage image,
- uint32_t *count,
- VkSparseImageMemoryRequirements *reqs)
-{
- const VkImageSparseMemoryRequirementsInfo2 info = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_SPARSE_MEMORY_REQUIREMENTS_INFO_2,
- .image = image
- };
-
- if (!reqs)
- return tu_GetImageSparseMemoryRequirements2(device, &info, count, NULL);
-
- VkSparseImageMemoryRequirements2 reqs2[*count];
- for (uint32_t i = 0; i < *count; i++) {
- reqs2[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2;
- reqs2[i].pNext = NULL;
- }
- tu_GetImageSparseMemoryRequirements2(device, &info, count, reqs2);
- for (uint32_t i = 0; i < *count; i++)
- reqs[i] = reqs2[i].memoryRequirements;
-}
diff --git a/lib/mesa/src/freedreno/vulkan/tu_wsi_display.c b/lib/mesa/src/freedreno/vulkan/tu_wsi_display.c
deleted file mode 100644
index 9a9696d93..000000000
--- a/lib/mesa/src/freedreno/vulkan/tu_wsi_display.c
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Copyright © 2017 Keith Packard
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that copyright
- * notice and this permission notice appear in supporting documentation, and
- * that the name of the copyright holders not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. The copyright holders make no representations
- * about the suitability of this software for any purpose. It is provided "as
- * is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THIS SOFTWARE.
- */
-
-#include <stdbool.h>
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include "tu_private.h"
-#include "tu_cs.h"
-#include "util/disk_cache.h"
-#include "util/strtod.h"
-#include "vk_util.h"
-#include <xf86drm.h>
-#include <xf86drmMode.h>
-#include "vk_format.h"
-#include "util/debug.h"
-#include "wsi_common_display.h"
-
-VkResult
-tu_GetPhysicalDeviceDisplayPropertiesKHR(VkPhysicalDevice physical_device,
- uint32_t *property_count,
- VkDisplayPropertiesKHR *properties)
-{
- TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device);
-
- return wsi_display_get_physical_device_display_properties(
- physical_device,
- &pdevice->wsi_device,
- property_count,
- properties);
-}
-
-VkResult
-tu_GetPhysicalDeviceDisplayProperties2KHR(VkPhysicalDevice physical_device,
- uint32_t *property_count,
- VkDisplayProperties2KHR *properties)
-{
- TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device);
-
- return wsi_display_get_physical_device_display_properties2(
- physical_device,
- &pdevice->wsi_device,
- property_count,
- properties);
-}
-
-VkResult
-tu_GetPhysicalDeviceDisplayPlanePropertiesKHR(
- VkPhysicalDevice physical_device,
- uint32_t *property_count,
- VkDisplayPlanePropertiesKHR *properties)
-{
- TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device);
-
- return wsi_display_get_physical_device_display_plane_properties(
- physical_device,
- &pdevice->wsi_device,
- property_count,
- properties);
-}
-
-VkResult
-tu_GetPhysicalDeviceDisplayPlaneProperties2KHR(
- VkPhysicalDevice physical_device,
- uint32_t *property_count,
- VkDisplayPlaneProperties2KHR *properties)
-{
- TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device);
-
- return wsi_display_get_physical_device_display_plane_properties2(
- physical_device,
- &pdevice->wsi_device,
- property_count,
- properties);
-}
-
-VkResult
-tu_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physical_device,
- uint32_t plane_index,
- uint32_t *display_count,
- VkDisplayKHR *displays)
-{
- TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device);
-
- return wsi_display_get_display_plane_supported_displays(
- physical_device,
- &pdevice->wsi_device,
- plane_index,
- display_count,
- displays);
-}
-
-
-VkResult
-tu_GetDisplayModePropertiesKHR(VkPhysicalDevice physical_device,
- VkDisplayKHR display,
- uint32_t *property_count,
- VkDisplayModePropertiesKHR *properties)
-{
- TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device);
-
- return wsi_display_get_display_mode_properties(physical_device,
- &pdevice->wsi_device,
- display,
- property_count,
- properties);
-}
-
-VkResult
-tu_GetDisplayModeProperties2KHR(VkPhysicalDevice physical_device,
- VkDisplayKHR display,
- uint32_t *property_count,
- VkDisplayModeProperties2KHR *properties)
-{
- TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device);
-
- return wsi_display_get_display_mode_properties2(physical_device,
- &pdevice->wsi_device,
- display,
- property_count,
- properties);
-}
-
-VkResult
-tu_CreateDisplayModeKHR(VkPhysicalDevice physical_device,
- VkDisplayKHR display,
- const VkDisplayModeCreateInfoKHR *create_info,
- const VkAllocationCallbacks *allocator,
- VkDisplayModeKHR *mode)
-{
- TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device);
-
- return wsi_display_create_display_mode(physical_device,
- &pdevice->wsi_device,
- display,
- create_info,
- allocator,
- mode);
-}
-
-VkResult
-tu_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physical_device,
- VkDisplayModeKHR mode_khr,
- uint32_t plane_index,
- VkDisplayPlaneCapabilitiesKHR *capabilities)
-{
- TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device);
-
- return wsi_get_display_plane_capabilities(physical_device,
- &pdevice->wsi_device,
- mode_khr,
- plane_index,
- capabilities);
-}
-
-VkResult
-tu_GetDisplayPlaneCapabilities2KHR(VkPhysicalDevice physical_device,
- const VkDisplayPlaneInfo2KHR *pDisplayPlaneInfo,
- VkDisplayPlaneCapabilities2KHR *capabilities)
-{
- TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device);
-
- return wsi_get_display_plane_capabilities2(physical_device,
- &pdevice->wsi_device,
- pDisplayPlaneInfo,
- capabilities);
-}
-
-VkResult
-tu_CreateDisplayPlaneSurfaceKHR(
- VkInstance _instance,
- const VkDisplaySurfaceCreateInfoKHR *create_info,
- const VkAllocationCallbacks *allocator,
- VkSurfaceKHR *surface)
-{
- TU_FROM_HANDLE(tu_instance, instance, _instance);
- const VkAllocationCallbacks *alloc;
-
- if (allocator)
- alloc = allocator;
- else
- alloc = &instance->alloc;
-
- return wsi_create_display_surface(_instance, alloc,
- create_info, surface);
-}
-
-VkResult
-tu_ReleaseDisplayEXT(VkPhysicalDevice physical_device,
- VkDisplayKHR display)
-{
- TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device);
-
- return wsi_release_display(physical_device,
- &pdevice->wsi_device,
- display);
-}
-
-#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
-VkResult
-tu_AcquireXlibDisplayEXT(VkPhysicalDevice physical_device,
- Display *dpy,
- VkDisplayKHR display)
-{
- TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device);
-
- return wsi_acquire_xlib_display(physical_device,
- &pdevice->wsi_device,
- dpy,
- display);
-}
-
-VkResult
-tu_GetRandROutputDisplayEXT(VkPhysicalDevice physical_device,
- Display *dpy,
- RROutput output,
- VkDisplayKHR *display)
-{
- TU_FROM_HANDLE(tu_physical_device, pdevice, physical_device);
-
- return wsi_get_randr_output_display(physical_device,
- &pdevice->wsi_device,
- dpy,
- output,
- display);
-}
-#endif /* VK_USE_PLATFORM_XLIB_XRANDR_EXT */
-
-/* VK_EXT_display_control */
-
-VkResult
-tu_DisplayPowerControlEXT(VkDevice _device,
- VkDisplayKHR display,
- const VkDisplayPowerInfoEXT *display_power_info)
-{
- TU_FROM_HANDLE(tu_device, device, _device);
-
- return wsi_display_power_control(_device,
- &device->physical_device->wsi_device,
- display,
- display_power_info);
-}
-
-VkResult
-tu_RegisterDeviceEventEXT(VkDevice _device,
- const VkDeviceEventInfoEXT *device_event_info,
- const VkAllocationCallbacks *allocator,
- VkFence *_fence)
-{
- TU_FROM_HANDLE(tu_device, device, _device);
- struct tu_fence *fence;
- VkResult ret;
-
- fence = vk_alloc2(&device->instance->alloc, allocator, sizeof (*fence),
- 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (!fence)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- tu_fence_init(fence, false);
-
- ret = wsi_register_device_event(_device,
- &device->physical_device->wsi_device,
- device_event_info,
- allocator,
- &fence->fence_wsi);
- if (ret == VK_SUCCESS)
- *_fence = tu_fence_to_handle(fence);
- else
- vk_free2(&device->instance->alloc, allocator, fence);
- return ret;
-}
-
-VkResult
-tu_RegisterDisplayEventEXT(VkDevice _device,
- VkDisplayKHR display,
- const VkDisplayEventInfoEXT *display_event_info,
- const VkAllocationCallbacks *allocator,
- VkFence *_fence)
-{
- TU_FROM_HANDLE(tu_device, device, _device);
-
- struct tu_fence *fence;
- VkResult ret;
-
- fence = vk_alloc2(&device->instance->alloc, allocator, sizeof (*fence),
- 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (!fence)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- tu_fence_init(fence, false);
-
- ret = wsi_register_display_event(_device,
- &device->physical_device->wsi_device,
- display,
- display_event_info,
- allocator,
- &fence->fence_wsi);
-
- if (ret == VK_SUCCESS)
- *_fence = tu_fence_to_handle(fence);
- else
- vk_free2(&device->instance->alloc, allocator, fence);
- return ret;
-}
-
-VkResult
-tu_GetSwapchainCounterEXT(VkDevice _device,
- VkSwapchainKHR swapchain,
- VkSurfaceCounterFlagBitsEXT flag_bits,
- uint64_t *value)
-{
- TU_FROM_HANDLE(tu_device, device, _device);
-
- return wsi_get_swapchain_counter(_device,
- &device->physical_device->wsi_device,
- swapchain,
- flag_bits,
- value);
-}
-